Largely completed baseline version of ILDF

This commit is contained in:
Graham Nelson 2020-04-06 19:24:56 +01:00
parent aeb670fc9a
commit 37b01a8d89
29 changed files with 2170 additions and 916 deletions

View file

@ -389,7 +389,7 @@ division in the current section.
extract_mode = TRUE;
} else if ((current_paragraph) && (Regexp::match(&mr2, mr.exp[0], L"%(sample (%c+) code%)"))) {
code_lcat_for_body = TEXT_EXTRACT_LCAT;
code_pl_for_body = Languages::find_by_name(mr2.exp[0]);
code_pl_for_body = Languages::find_by_name(mr2.exp[0], W);
extract_mode = TRUE;
} else if ((current_paragraph) && (Regexp::match(&mr2, mr.exp[0], L"%(sample code%)"))) {
code_lcat_for_body = TEXT_EXTRACT_LCAT;

View file

@ -146,11 +146,11 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
W->analysed = FALSE;
W->as_ebook = NULL;
W->redirect_weaves_to = NULL;
W->main_language = Languages::default();
W->main_language = Languages::default(W);
W->no_lines = 0; W->no_paragraphs = 0;
text_stream *language_name = Bibliographic::get_datum(W->md, I"Language");
if (Str::len(language_name) > 0)
W->main_language = Languages::find_by_name(language_name);
W->main_language = Languages::find_by_name(language_name, W);
main_target = Reader::add_tangle_target(W, W->main_language);
@<Initialise the rest of the chapter structure@> =
@ -159,7 +159,7 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
C->sections = NEW_LINKED_LIST(section);
C->ch_language = W->main_language;
if (Str::len(Cm->ch_language_name) > 0)
C->ch_language = Languages::find_by_name(Cm->ch_language_name);
C->ch_language = Languages::find_by_name(Cm->ch_language_name, W);
@<Initialise the rest of the section structure@> =
S->sect_extent = 0;
@ -176,10 +176,10 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
S->owning_web = W;
S->sect_language = C->ch_language;
if (Str::len(S->md->sect_language_name) > 0)
S->sect_language = Languages::find_by_name(S->md->sect_language_name);
S->sect_language = Languages::find_by_name(S->md->sect_language_name, W);
if (Str::len(S->md->sect_independent_language) > 0) {
programming_language *pl =
Languages::find_by_name(S->md->sect_independent_language);
Languages::find_by_name(S->md->sect_independent_language, W);
S->sect_language = pl;
S->sect_target = Reader::add_tangle_target(W, pl);
} else {

View file

@ -185,7 +185,7 @@ at us; but we don't weave them into the output, that's for sure.
} else if (Regexp::match(&mr, figname, L"(%c+) as (%c+)")) {
if (S->md->using_syntax < V2_SYNTAX)
Parser::wrong_version(S->md->using_syntax, L, "[[F as L]]", V2_SYNTAX);
programming_language *pl = Languages::find_by_name(mr.exp[1]);
programming_language *pl = Languages::find_by_name(mr.exp[1], W);
Formats::figure(OUT, wv, mr.exp[0], -1, -1, pl);
} else {
Formats::figure(OUT, wv, figname, -1, -1, NULL);

View file

@ -7,7 +7,7 @@ definitions from files.
Programming languages are identified by name: for example, |C++| or |Perl|.
@ =
programming_language *Languages::find_by_name(text_stream *lname) {
programming_language *Languages::find_by_name(text_stream *lname, web *W) {
programming_language *pl;
@<If this is the name of a language already known, return that@>;
@<Read the language definition file with this name@>;
@ -23,21 +23,32 @@ programming_language *Languages::find_by_name(text_stream *lname) {
return pl;
@<Read the language definition file with this name@> =
filename *F = NULL;
if (W) {
pathname *P = Pathnames::subfolder(W->md->path_to_web, I"Private Languages");
@<Try P@>;
}
pathname *P = Languages::default_directory();
TEMPORARY_TEXT(leaf);
WRITE_TO(leaf, "%S.ildf", lname);
filename *F = Filenames::in_folder(P, leaf);
DISCARD_TEXT(leaf);
if (TextFiles::exists(F) == FALSE)
@<Try P@>;
if (F == NULL)
Errors::fatal_with_text(
"unsupported programming language '%S'", lname);
pl = Languages::read_definition(F);
@<Try P@> =
if (F == NULL) {
TEMPORARY_TEXT(leaf);
WRITE_TO(leaf, "%S.ildf", lname);
F = Filenames::in_folder(P, leaf);
DISCARD_TEXT(leaf);
if (TextFiles::exists(F) == FALSE) F = NULL;
}
@ I'm probably showing my age here.
=
programming_language *Languages::default(void) {
return Languages::find_by_name(I"C");
programming_language *Languages::default(web *W) {
return Languages::find_by_name(I"C", W);
}
void Languages::show(OUTPUT_STREAM) {
@ -210,9 +221,9 @@ declare a reserved keyword, or set a key to a value.
pl->program = Languages::new_block(NULL, WHOLE_LINE_CRULE_RUN);
state->current_block = pl->program;
} else if (Regexp::match(&mr, line, L"keyword (%C+) of (%c+?)")) {
Languages::reserved(pl, mr.exp[0], Languages::colour(mr.exp[1], tfp), tfp);
Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), Languages::colour(mr.exp[1], tfp), tfp);
} else if (Regexp::match(&mr, line, L"keyword (%C+)")) {
Languages::reserved(pl, mr.exp[0], RESERVED_COLOUR, tfp);
Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), RESERVED_COLOUR, tfp);
} else if (Regexp::match(&mr, line, L"(%c+) *: *(%c+?)")) {
text_stream *key = mr.exp[0], *value = Str::duplicate(mr.exp[1]);
if (Str::eq(key, I"Name")) pl->language_name = Languages::text(value, tfp, TRUE);
@ -291,6 +302,12 @@ runs of a given colour, or give an if-X-then-Y rule:
rule->execute_block =
Languages::new_block(state->current_block, CHARACTERS_CRULE_RUN);
state->current_block = rule->execute_block;
} else if (Regexp::match(&mr, line, L"characters in (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
rule->execute_block =
Languages::new_block(state->current_block, CHARACTERS_IN_CRULE_RUN);
rule->execute_block->char_set = Languages::text(mr.exp[0], tfp, FALSE);
state->current_block = rule->execute_block;
} else if (Regexp::match(&mr, line, L"runs of (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
int r = UNQUOTED_COLOUR;
@ -302,6 +319,16 @@ runs of a given colour, or give an if-X-then-Y rule:
rule->execute_block = Languages::new_block(state->current_block, INSTANCES_CRULE_RUN);
rule->execute_block->run_instance = Languages::text(mr.exp[0], tfp, FALSE);
state->current_block = rule->execute_block;
} else if (Regexp::match(&mr, line, L"matches of (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
rule->execute_block = Languages::new_block(state->current_block, MATCHES_CRULE_RUN);
Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
state->current_block = rule->execute_block;
} else if (Regexp::match(&mr, line, L"brackets in (%c+) {")) {
colouring_rule *rule = Languages::new_rule(state->current_block);
rule->execute_block = Languages::new_block(state->current_block, BRACKETS_CRULE_RUN);
Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
state->current_block = rule->execute_block;
} else {
int at = -1, quoted = FALSE;
for (int i=0; i<Str::len(line)-1; i++) {
@ -329,7 +356,10 @@ represents a complete program.
@d WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */
@d CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */
@d INSTANCES_CRULE_RUN -3 /* This block applies to each instance in turn */
@d CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */
@d INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */
@d MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */
@d BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */
=
typedef struct colouring_language_block {
@ -337,6 +367,11 @@ typedef struct colouring_language_block {
struct colouring_language_block *parent; /* or |NULL| for the topmost one */
int run; /* one of the |*_CRULE_RUN| values, or else a colour */
struct text_stream *run_instance; /* used only for |INSTANCES_CRULE_RUN| */
struct text_stream *char_set; /* used only for |CHARACTERS_IN_CRULE_RUN| */
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for |MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN| */
/* workspace during painting */
struct match_results mr; /* of a regular expression */
MEMORY_MANAGEMENT
} colouring_language_block;
@ -347,6 +382,9 @@ colouring_language_block *Languages::new_block(colouring_language_block *within,
block->parent = within;
block->run = r;
block->run_instance = NULL;
block->char_set = NULL;
block->match_regexp_text[0] = 0;
block->mr = Regexp::create_mr();
return block;
}
@ -365,13 +403,18 @@ Note that rules can be unconditional, in that the premiss always passes.
@d SPACED_RULE_SUFFIX 6 /* for |spaced suffix P| */
@d OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for |optionally spaced suffix P| */
@d MAX_ILDF_REGEXP_LENGTH 64
=
typedef struct colouring_rule {
/* the premiss: */
int match_colour; /* for |colour C|, or else |NOT_A_COLOUR| */
int sense; /* |FALSE| to negate the condition */
int match_colour; /* for |coloured C|, or else |NOT_A_COLOUR| */
int match_keyword_of_colour; /* for |keyword C|, or else |NOT_A_COLOUR| */
struct text_stream *match_text; /* or length 0 to mean "anything" */
int match_prefix; /* one of the |*_RULE_PREFIX| values above */
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH];
int number; /* for |number N| rules; 0 for others */
/* the conclusion: */
struct colouring_language_block *execute_block; /* or |NULL|, in which case... */
@ -381,6 +424,7 @@ typedef struct colouring_rule {
/* workspace during painting */
int fix_position; /* where the prefix or suffix started */
struct match_results mr; /* of a regular expression */
MEMORY_MANAGEMENT
} colouring_rule;
@ -389,15 +433,21 @@ colouring_rule *Languages::new_rule(colouring_language_block *within) {
if (within == NULL) internal_error("rule outside block");
colouring_rule *rule = CREATE(colouring_rule);
ADD_TO_LINKED_LIST(rule, colouring_rule, within->rules);
rule->sense = TRUE;
rule->match_colour = NOT_A_COLOUR;
rule->match_text = NULL;
rule->match_prefix = NOT_A_RULE_PREFIX;
rule->match_keyword_of_colour = NOT_A_COLOUR;
rule->match_regexp_text[0] = 0;
rule->number = 0;
rule->set_to_colour = NOT_A_COLOUR;
rule->set_prefix_to_colour = NOT_A_COLOUR;
rule->execute_block = NULL;
rule->debug = FALSE;
rule->fix_position = 0;
rule->mr = Regexp::create_mr();
return rule;
}
@ -413,13 +463,21 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
}
@<Parse the premiss@> =
if (Regexp::match(&mr, premiss, L"keyword of (%c+)")) {
while (Regexp::match(&mr, premiss, L"not (%c+)")) {
rule->sense = (rule->sense)?FALSE:TRUE;
Str::clear(premiss); Str::copy(premiss, mr.exp[0]);
}
if (Regexp::match(&mr, premiss, L"number (%d+)")) {
rule->number = Str::atoi(mr.exp[0], 0);
} else if (Regexp::match(&mr, premiss, L"keyword of (%c+)")) {
rule->match_keyword_of_colour = Languages::colour(mr.exp[0], tfp);
} else if (Regexp::match(&mr, premiss, L"keyword")) {
Errors::in_text_file("ambiguous: make it keyword of !reserved or \"keyword\"", tfp);
} else if (Regexp::match(&mr, premiss, L"prefix (%c+)")) {
rule->match_prefix = UNSPACED_RULE_PREFIX;
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
} else if (Regexp::match(&mr, premiss, L"matching (%c+)")) {
Languages::regexp(rule->match_regexp_text, mr.exp[0], tfp);
} else if (Regexp::match(&mr, premiss, L"spaced prefix (%c+)")) {
rule->match_prefix = SPACED_RULE_PREFIX;
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
@ -435,7 +493,7 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
} else if (Regexp::match(&mr, premiss, L"optionally spaced suffix (%c+)")) {
rule->match_prefix = OPTIONALLY_SPACED_RULE_SUFFIX;
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
} else if (Regexp::match(&mr, premiss, L"colou*r (%c+)")) {
} else if (Regexp::match(&mr, premiss, L"coloured (%c+)")) {
rule->match_colour = Languages::colour(mr.exp[0], tfp);
} else if (Str::len(premiss) > 0) {
rule->match_text = Languages::text(premiss, tfp, FALSE);
@ -575,6 +633,12 @@ text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow)
} else if ((bareword == FALSE) && (c == '"')) {
Errors::in_text_file(
"backslash needed before internal double-quotation mark", tfp);
} else if ((bareword) && (c == '!') && (i == from)) {
Errors::in_text_file(
"a literal starting with ! must be in double-quotation marks", tfp);
} else if ((bareword) && (c == '/')) {
Errors::in_text_file(
"forward slashes can only be used in quoted strings", tfp);
} else if ((bareword) && (c == '"')) {
Errors::in_text_file(
"double-quotation marks can only be used in quoted strings", tfp);
@ -588,6 +652,102 @@ text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow)
Errors::in_text_file_S(err, tfp);
DISCARD_TEXT(err);
}
if (bareword) {
int rw = FALSE;
if (Str::eq(V, I"both")) rw = TRUE;
if (Str::eq(V, I"brackets")) rw = TRUE;
if (Str::eq(V, I"characters")) rw = TRUE;
if (Str::eq(V, I"coloured")) rw = TRUE;
if (Str::eq(V, I"colouring")) rw = TRUE;
if (Str::eq(V, I"debug")) rw = TRUE;
if (Str::eq(V, I"false")) rw = TRUE;
if (Str::eq(V, I"in")) rw = TRUE;
if (Str::eq(V, I"instances")) rw = TRUE;
if (Str::eq(V, I"keyword")) rw = TRUE;
if (Str::eq(V, I"matches")) rw = TRUE;
if (Str::eq(V, I"matching")) rw = TRUE;
if (Str::eq(V, I"not")) rw = TRUE;
if (Str::eq(V, I"of")) rw = TRUE;
if (Str::eq(V, I"on")) rw = TRUE;
if (Str::eq(V, I"optionally")) rw = TRUE;
if (Str::eq(V, I"prefix")) rw = TRUE;
if (Str::eq(V, I"runs")) rw = TRUE;
if (Str::eq(V, I"spaced")) rw = TRUE;
if (Str::eq(V, I"suffix")) rw = TRUE;
if (Str::eq(V, I"true")) rw = TRUE;
if (Str::eq(V, I"unquoted")) rw = TRUE;
if (rw) {
TEMPORARY_TEXT(err);
WRITE_TO(err, "'%S' is a reserved word, so you should put it in double-quotation marks", V);
Errors::in_text_file_S(err, tfp);
DISCARD_TEXT(err);
}
}
}
return V;
}
@ And regular expressions.
=
void Languages::regexp(wchar_t *write_to, text_stream *T, text_file_position *tfp) {
if (write_to == NULL) internal_error("no buffer");
write_to[0] = 0;
if (Str::len(T) > 0) {
int from = 0, to = Str::len(T)-1, x = 0;
if ((to > from) &&
(Str::get_at(T, from) == '/') && (Str::get_at(T, to) == '/')) {
from++; to--;
for (int i=from; i<=to; i++) {
wchar_t c = Str::get_at(T, i);
if (c == '\\') {
wchar_t w = Str::get_at(T, i+1);
if (w == '\\') {
x = Languages::add_to_regexp(write_to, x, w);
} else if (w == 'd') {
x = Languages::add_escape_to_regexp(write_to, x, 'd');
} else if (w == 't') {
x = Languages::add_escape_to_regexp(write_to, x, 't');
} else if (w == 's') {
x = Languages::add_to_regexp(write_to, x, ' ');
} else if (w == 'S') {
x = Languages::add_escape_to_regexp(write_to, x, 'C');
} else if (w == '"') {
x = Languages::add_escape_to_regexp(write_to, x, 'q');
} else {
x = Languages::add_escape_to_regexp(write_to, x, w);
}
i++;
continue;
}
if (c == '.') {
x = Languages::add_escape_to_regexp(write_to, x, 'c');
continue;
}
if (c == '%') {
x = Languages::add_escape_to_regexp(write_to, x, '%');
continue;
}
x = Languages::add_to_regexp(write_to, x, c);
}
} else {
Errors::in_text_file(
"the expression to match must be in slashes '/'", tfp);
}
if (x >= MAX_ILDF_REGEXP_LENGTH)
Errors::in_text_file(
"the expression to match is too long", tfp);
}
}
int Languages::add_to_regexp(wchar_t *write_to, int i, wchar_t c) {
if (i < MAX_ILDF_REGEXP_LENGTH) write_to[i++] = c;
return i;
}
int Languages::add_escape_to_regexp(wchar_t *write_to, int i, wchar_t c) {
i = Languages::add_to_regexp(write_to, i, '%');
i = Languages::add_to_regexp(write_to, i, c);
return i;
}

View file

@ -207,24 +207,53 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
LOOP_OVER_LINKED_LIST(rule, colouring_rule, block->rules) {
switch (block->run) {
case WHOLE_LINE_CRULE_RUN:
Painter::execute_rule(HT, rule, matter, colouring, from, to);
Painter::execute_rule(HT, rule, matter, colouring, from, to, 1);
break;
case CHARACTERS_CRULE_RUN:
for (int i=from; i<=to; i++)
Painter::execute_rule(HT, rule, matter, colouring, i, i);
Painter::execute_rule(HT, rule, matter, colouring, i, i, i-from+1);
break;
case CHARACTERS_IN_CRULE_RUN:
for (int count=1, i=from; i<=to; i++)
for (int j=0; j<Str::len(block->char_set); j++)
if (Str::get_at(matter, i) == Str::get_at(block->char_set, j) ) {
Painter::execute_rule(HT, rule, matter, colouring, i, i, count++);
break;
}
break;
case INSTANCES_CRULE_RUN: {
int L = Str::len(block->run_instance) - 1;
if (L >= 0)
for (int i=from; i<=to - L; i++)
for (int count=1, i=from; i<=to - L; i++)
if (ACMESupport::text_at(matter, i, block->run_instance)) {
Painter::execute_rule(HT, rule, matter, colouring, i, i+L);
Painter::execute_rule(HT, rule, matter, colouring, i, i+L, count++);
i += L;
}
break;
}
case MATCHES_CRULE_RUN:
for (int count=1, i=from; i<=to; i++) {
int L = Regexp::match_from(&(block->mr), matter, block->match_regexp_text, i, TRUE);
if (L > 0) {
Painter::execute_rule(HT, rule, matter, colouring, i, i+L-1, count++);
i += L-1;
}
}
break;
case BRACKETS_CRULE_RUN:
for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
if (block->mr.exp[i])
Str::clear(block->mr.exp[i]);
if (Regexp::match(&(block->mr), matter, block->match_regexp_text))
for (int count=1, i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
if (block->mr.exp_at[i] >= 0)
Painter::execute_rule(HT, rule, matter, colouring,
block->mr.exp_at[i],
block->mr.exp_at[i] + Str::len(block->mr.exp[i])-1,
count++);
break;
default: {
int ident_from = -1;
int ident_from = -1, count = 1;
for (int i=from; i<=to; i++) {
int col = Str::get_at(colouring_at_start, i);
if ((col == block->run) ||
@ -233,12 +262,12 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
if (ident_from == -1) ident_from = i;
} else {
if (ident_from >= 0)
Painter::execute_rule(HT, rule, matter, colouring, ident_from, i-1);
Painter::execute_rule(HT, rule, matter, colouring, ident_from, i-1, count++);
ident_from = -1;
}
}
if (ident_from >= 0)
Painter::execute_rule(HT, rule, matter, colouring, ident_from, to);
Painter::execute_rule(HT, rule, matter, colouring, ident_from, to, count++);
break;
}
}
@ -250,8 +279,8 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
=
void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *matter,
text_stream *colouring, int from, int to) {
if (Painter::satisfies(HT, rule, matter, colouring, from, to))
text_stream *colouring, int from, int to, int N) {
if (Painter::satisfies(HT, rule, matter, colouring, from, to, N) == rule->sense)
Painter::follow(HT, rule, matter, colouring, from, to);
}
@ -267,8 +296,13 @@ void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *ma
=
int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter,
text_stream *colouring, int from, int to) {
if (Str::len(rule->match_text) > 0) {
text_stream *colouring, int from, int to, int N) {
if (rule->number > 0) {
if (rule->number != N) return FALSE;
} else if (rule->match_regexp_text[0]) {
if (Regexp::match(&(rule->mr), matter, rule->match_regexp_text) == FALSE)
return FALSE;
} else if (Str::len(rule->match_text) > 0) {
if ((rule->match_prefix == UNSPACED_RULE_PREFIX) ||
(rule->match_prefix == SPACED_RULE_PREFIX) ||
(rule->match_prefix == OPTIONALLY_SPACED_RULE_PREFIX)) {
@ -295,7 +329,11 @@ int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter
return FALSE;
rule->fix_position = pos;
} else {
if (Str::ne(matter, rule->match_text)) return FALSE;
if (Str::len(rule->match_text) != to-from+1)
return FALSE;
for (int i=from; i<=to; i++)
if (Str::get_at(matter, i) != Str::get_at(rule->match_text, i-from))
return FALSE;
}
} else if (rule->match_keyword_of_colour != NOT_A_COLOUR) {
TEMPORARY_TEXT(id);

View file

@ -1,14 +1,14 @@
Name: ACME
Details: The ACME assembly language for 6502 and related CPUs
Extension: .a
Line Comment: ;
Name: "ACME"
Details: "The ACME assembly language for 6502 and related CPUs"
Extension: ".a"
Line Comment: ";"
String Literal: "\""
String Literal Escape: \
Character Literal: '
Character Literal Escape: \
Binary Literal Prefix: %
Hexadecimal Literal Prefix: $
Negative Literal Prefix: -
String Literal Escape: "\\"
Character Literal: "'"
Character Literal Escape: "\\"
Binary Literal Prefix: "%"
Hexadecimal Literal Prefix: "$"
Negative Literal Prefix: "-"
colouring {
runs of unquoted {

View file

@ -1,34 +1,34 @@
Name: C++
Details: The C++ programming language
Extension: .cpp
Multiline Comment Open: /*
Multiline Comment Close: */
Line Comment: //
Name: "C++"
Details: "The C++ programming language"
Extension: ".cpp"
Multiline Comment Open: "/*"
Multiline Comment Close: "*/"
Line Comment: "//"
String Literal: "\""
String Literal Escape: \
Character Literal: '
Character Literal Escape: \
String Literal Escape: "\\"
Character Literal: "'"
Character Literal Escape: "\\"
C-Like: true
# C++ does in fact support octal literals, marking them as starting with an
# unnecessary initial zero. This is practically obsolete now, and in any case
# makes no difference to syntax-colouring.
Hexadecimal Literal Prefix: 0x
Binary Literal Prefix: 0b
Negative Literal Prefix: -
Hexadecimal Literal Prefix: "0x"
Binary Literal Prefix: "0b"
Negative Literal Prefix: "-"
Before Named Paragraph Expansion: \n{\n
After Named Paragraph Expansion: }\n
Start Ifdef: #ifdef %S\n
End Ifdef: #endif /* %S */\n
Start Ifndef: #ifndef %S\n
End Ifndef: #endif /* %S */\n
Before Named Paragraph Expansion: "\n{\n"
After Named Paragraph Expansion: "}\n"
Start Ifdef: "#ifdef %S\n"
End Ifdef: "#endif /* %S */\n"
Start Ifndef: "#ifndef %S\n"
End Ifndef: "#endif /* %S */\n"
Line Marker: "#line %d \"%f\"\n"
Start Definition: #define %S\s
Prolong Definition: \\\n\s\s\s\s
End Definition: \n
Start Definition: "#define %S\s"
Prolong Definition: "\\\n\s\s\s\s"
End Definition: "\n"
keyword auto
keyword break

View file

@ -1,13 +1,13 @@
Name: C
Details: The C programming language
Extension: .c
Multiline Comment Open: /*
Multiline Comment Close: */
Line Comment: //
Name: "C"
Details: "The C programming language"
Extension: ".c"
Multiline Comment Open: "/*"
Multiline Comment Close: "*/"
Line Comment: "//"
String Literal: "\""
String Literal Escape: \
Character Literal: '
Character Literal Escape: \
String Literal Escape: "\\"
Character Literal: "'"
Character Literal Escape: "\\"
C-Like: true
# C does in fact support octal literals, marking them as starting with an
@ -16,21 +16,21 @@ C-Like: true
# rejected by the C standards body as useless, but are so useful that gcc
# and clang support them anyway.
Hexadecimal Literal Prefix: 0x
Binary Literal Prefix: 0b
Negative Literal Prefix: -
Hexadecimal Literal Prefix: "0x"
Binary Literal Prefix: "0b"
Negative Literal Prefix: "-"
Before Named Paragraph Expansion: \n{\n
After Named Paragraph Expansion: }\n
Start Ifdef: #ifdef %S\n
End Ifdef: #endif /* %S */\n
Start Ifndef: #ifndef %S\n
End Ifndef: #endif /* %S */\n
Before Named Paragraph Expansion: "\n{\n"
After Named Paragraph Expansion: "}\n"
Start Ifdef: "#ifdef %S\n"
End Ifdef: "#endif /* %S */\n"
Start Ifndef: "#ifndef %S\n"
End Ifndef: "#endif /* %S */\n"
Line Marker: "#line %d \"%f\"\n"
Start Definition: #define %S\s
Prolong Definition: \\\n\s\s\s\s
End Definition: \n
Start Definition: "#define %S\s"
Prolong Definition: "\\\n\s\s\s\s"
End Definition: "\n"
keyword auto
keyword break

View file

@ -1,17 +1,44 @@
Name: ILDF
Details: The Inweb Language Definition File format
Extension: .ildf
Whole Line Comment: #
Name: "ILDF"
Details: "The Inweb Language Definition File format"
Extension: ".ildf"
Whole Line Comment: "#"
Supports Namespaces: false
String Literal: "\""
String Literal Escape: \
String Literal Escape: "\\"
keyword unquoted of !element
# Regular expressions are handled here as if character literals
Character Literal: "/"
Character Literal Escape: "\\"
keyword "both"
keyword "brackets"
keyword "characters"
keyword "coloured"
keyword "colouring"
keyword "debug"
keyword "false"
keyword "in"
keyword "instances"
keyword "keyword"
keyword "matches"
keyword "matching"
keyword "not"
keyword "of"
keyword "on"
keyword "optionally"
keyword "prefix"
keyword "runs"
keyword "spaced"
keyword "suffix"
keyword "true"
keyword "unquoted" of !element
colouring {
runs of !identifier {
prefix ! => !element
prefix "!" => !element on both
keyword of !element => !element
keyword of !reserved => !reserved
}
runs of unquoted {
instances of "=>" {
@ -24,4 +51,16 @@ colouring {
=> !reserved
}
}
characters {
# Anything left of these colours will be unquoted strings, so...
coloured !constant => !string
coloured !identifier => !string
# Regular expressions, now coloured !character, are more like functions
coloured !character => !function
}
# Detect Property: Value lines, not being fooled by a colon inside quotes
brackets in /\s*([A-Z][^"]*):.*/ {
# Uncolour only the bracketed part, i.e., the Property part
=> !plain
}
}

View file

@ -1,14 +1,14 @@
Name: InC
Details: The Inform-tools extension to the C programming language
Extension: .c
Name: "InC"
Details: "The Inform-tools extension to the C programming language"
Extension: ".c"
Supports Namespaces: true
Multiline Comment Open: /*
Multiline Comment Close: */
Line Comment: //
Multiline Comment Open: "/*"
Multiline Comment Close: "*/"
Line Comment: "//"
String Literal: "\""
String Literal Escape: \
Character Literal: '
Character Literal Escape: \
String Literal Escape: "\\"
Character Literal: "'"
Character Literal Escape: "\\"
C-Like: true
# C does in fact support octal literals, marking them as starting with an
@ -17,30 +17,28 @@ C-Like: true
# rejected by the C standards body as useless, but are so useful that gcc
# and clang support them anyway.
Hexadecimal Literal Prefix: 0x
Binary Literal Prefix: 0b
Negative Literal Prefix: -
Hexadecimal Literal Prefix: "0x"
Binary Literal Prefix: "0b"
Negative Literal Prefix: "-"
# The "shebang" routine for a language is called to add anything it wants to
# at the very top of the tangled code. (For a scripting language such as
# Perl or Python, that might be a shebang: hence the name.)
# Perl or Python, that might be a shebang: "hence the name.)"
# But we will use it to defime the constant PLATFORM_POSIX everywhere except
# Windows. This needs to happen right at the top, because the "very early
# code" in a tangle may contain material conditional on whether it is defined.
Shebang: #ifndef PLATFORM_WINDOWS\n#define PLATFORM_POSIX\n#endif\n
Before Named Paragraph Expansion: \n{\n
After Named Paragraph Expansion: }\n
Start Ifdef: #ifdef %S\n
End Ifdef: #endif /* %S */\n
Start Ifndef: #ifndef %S\n
End Ifndef: #endif /* %S */\n
Shebang: "#ifndef PLATFORM_WINDOWS\n#define PLATFORM_POSIX\n#endif\n"
Before Named Paragraph Expansion: "\n{\n"
After Named Paragraph Expansion: "}\n"
Start Ifdef: "#ifdef %S\n"
End Ifdef: "#endif /* %S */\n"
Start Ifndef: "#ifndef %S\n"
End Ifndef: "#endif /* %S */\n"
Line Marker: "#line %d \"%f\"\n"
Start Definition: #define %S\s
Prolong Definition: \\\n\s\s\s\s
End Definition: \n
Start Definition: "#define %S\s"
Prolong Definition: "\\\n\s\s\s\s"
End Definition: "\n"
# FILE gets in even though it's not technically reserved but only a type
# name, defined in the standard C library.

View file

@ -1,22 +1,22 @@
Name: Inform 6
Details: The C-like interactive fiction language Inform 6
Extension: .i6
Line Comment: !
Name: "Inform 6"
Details: "The C-like interactive fiction language Inform 6"
Extension: ".i6"
Line Comment: "!"
String Literal: "\""
String Literal Escape: \
Character Literal: '
Character Literal Escape: \
Binary Literal Prefix: $$
Hexadecimal Literal Prefix: $
Negative Literal Prefix: -
String Literal Escape: "\\"
Character Literal: "'"
Character Literal Escape: "\\"
Binary Literal Prefix: "$$"
Hexadecimal Literal Prefix: "$"
Negative Literal Prefix: "-"
Start Definition: Constant %S =\s
End Definition: ;\n
Start Definition: "Constant %S =\s"
End Definition: ";\n"
Start Ifdef: #ifdef %S;\n
End Ifdef: #endif; ! %S\n
Start Ifndef: #ifndef %S;\n
End Ifndef: #endif; ! %S\n
Start Ifdef: "#ifdef %S;\n"
End Ifdef: "#endif; ! %S\n"
Start Ifndef: "#ifndef %S;\n"
End Ifndef: "#endif; ! %S\n"
# Reserved words:

View file

@ -1,8 +1,8 @@
Name: Inform 7
Details: The natural-language based language Inform 7
Extension: .i7x
Multiline Comment Open: [
Multiline Comment Close: ]
Name: "Inform 7"
Details: "The natural-language based language Inform 7"
Extension: ".i7x"
Multiline Comment Open: "["
Multiline Comment Close: "]"
String Literal: "\""
# This is here so that tangling the Standard Rules extension doesn't insert

View file

@ -1,3 +1,3 @@
Name: None
Details: For programs in languages not yet supported by Inweb
Extension: .txt
Name: "None"
Details: "For programs in languages not yet supported by Inweb"
Extension: ".txt"

View file

@ -1,17 +1,17 @@
Name: Perl
Details: The scripting language Perl 5
Extension: .pl
Line Comment: #
Name: "Perl"
Details: "The scripting language Perl 5"
Extension: ".pl"
Line Comment: "#"
String Literal: "\""
String Literal Escape: \
Character Literal: '
Character Literal Escape: \
String Literal Escape: "\\"
Character Literal: "'"
Character Literal Escape: "\\"
Shebang: #!/usr/bin/perl\n\n
Before Named Paragraph Expansion: \n{\n
After Named Paragraph Expansion: }\n
Start Definition: %S =
End Definition: \n;\n
Shebang: "#!/usr/bin/perl\n\n"
Before Named Paragraph Expansion: "\n{\n"
After Named Paragraph Expansion: "}\n"
Start Definition: "%S ="
End Definition: "\n;\n"
# In its usual zany way, Perl recognises the same #line syntax as C, thus in
# principle overloading its comment notation #:

View file

@ -1,6 +1,6 @@
Name: Plain Text
Details: For text files which are not programs
Extension: .txt
Name: "Plain Text"
Details: "For text files which are not programs"
Extension: ".txt"
colouring {
=> !plain

View file

@ -46,7 +46,9 @@ This section of the manual is about how to do it.
Once you have written a definition, use |-read-language L| at the command
line, where |L| is the file defining it. If you have many custom languages,
|-read-languages D| reads all of the definitions in a directory |D|.
|-read-languages D| reads all of the definitions in a directory |D|. Or, if
the language in question is really quite specific to a single web, you can
make a |Private Languages| subdirectory of the web and put it in there.
@h Structure of language definitions.
Each language is defined by a single ILDF file. ("Inweb Language Definition
@ -57,7 +59,7 @@ trailing whitespace on each line is ignored; blank lines are ignored; and
so are comments, which are lines beginning with a |#| character.
The ILD contains three sorts of thing:
(a) Properties, set by lines in the form |Name: C++|.
(a) Properties, set by lines in the form |Name: "C++"|.
(b) Keywords, set by lines in the form |keyword int|.
(c) A colouring program, introduced by |colouring {| and continuing until the
last block of it is closed with a |}|.
@ -66,15 +68,15 @@ Everything in an ILD is optional, so a minimal ILD is in principle empty. In
practice, though, every ILD should open like so:
= (sample ILDF code)
Name: C
Details: The C programming language
Extension: .c
Name: "C"
Details: "The C programming language"
Extension: ".c"
@h Properties.
Inevitably, there's a miscellaneous shopping list of these, but let's start
with the semi-compulsory ones.
|Name|. This is the one used by webs in their |Language: X| lines, and should
|Name|. This is the one used by webs in their |Language: "X"| lines, and should
match the ILD's own filename: wherever it is stored, the ILD for langauge |X|
should be filenamed |X.ildf|.
@ -102,9 +104,9 @@ as a pair or not at all, is the notation for multiline comments.
For example, C defines:
= (sample ILDF code)
Multiline Comment Open: /*
Multiline Comment Close: */
Line Comment: //
Multiline Comment Open: "/*"
Multiline Comment Close: "*/"
Line Comment: "//"
@ As noted, comments occur only outside of string or character literals. We
can give notations for these as follows:
@ -121,9 +123,9 @@ Here, C defines:
= (sample ILDF code)
String Literal: "\""
String Literal Escape: \
Character Literal: '
Character Literal Escape: \
String Literal Escape: "\\"
Character Literal: "'"
Character Literal Escape: "\\"
@ Next, numeric literals, like |0xFE45| in C, or |$$10011110| in Inform 6.
It's assumed that every language allows non-negative decimal numbers.
@ -136,16 +138,16 @@ are notations for non-decimal numbers, if they exist.
Here, C has:
= (sample ILDF code)
Hexadecimal Literal Prefix: 0x
Binary Literal Prefix: 0b
Negative Literal Prefix: -
Hexadecimal Literal Prefix: "0x"
Binary Literal Prefix: "0b"
Negative Literal Prefix: "-"
@ |Shebang| is used only in tangling, and is a probably short text added at
the very beginning of a tangled program. This is useful for scripting languages
in Unix, where the opening line must be a "shebang" indicating their language.
For example, Perl defines:
= (sample ILDF code)
Shebang: #!/usr/bin/perl\n\n
Shebang: "#!/usr/bin/perl\n\n"
=
Most languages do not have a shebang.
@ -170,8 +172,8 @@ matter added. This material is in |Before Named Paragraph Expansion| and
For C and all similar languages, we recommend this:
= (sample ILDF code)
Before Named Paragraph Expansion: \n{\n
After Named Paragraph Expansion: }\n
Before Named Paragraph Expansion: "\n{\n"
After Named Paragraph Expansion: "}\n"
=
The effect of this is to ensure that code such as:
= (not code)
@ -200,12 +202,12 @@ It can only do so if the language provides a notation for that.
continue a multiline definition (if they are allowed); and |End Definition|,
if given, places any ending notation. For example, Inform 6 defines:
= (sample ILDF code)
Start Definition: Constant %S =\s
End Definition: ;\n
Start Definition: "Constant %S =\s"
End Definition: ";\n"
=
where |%S| expands to the name of the term to be defined. Thus, we might tangle
out to:
= (sample ILDF code)
= (not code)
Constant TAXICAB = 1729;\n
=
Inweb ignores all definitions unless one of these three properties is given.
@ -216,10 +218,10 @@ makes use of this to handle code dependent on the operating system in use.
If the language supports it, the notation is in |Start Ifdef| and |End Ifdef|,
and in |Start Ifndef| and |End Ifndef|. For example, Inform 6 has:
= (sample ILDF code)
Start Ifdef: #ifdef %S;\n
End Ifdef: #endif; ! %S\n
Start Ifndef: #ifndef %S;\n
End Ifndef: #endif; ! %S\n
Start Ifdef: "#ifdef %S;\n"
End Ifdef: "#endif; ! %S\n"
Start Ifndef: "#ifndef %S;\n"
End Ifndef: "#endif; ! %S\n"
=
which is a subtly different notation from the C one. Again, |%S| expands to
the name of the term we are conditionally compiling on.
@ -310,88 +312,215 @@ block, that's a line of source code. Blocks normally contain one or more
"rules":
= (sample ILDF code)
colouring {
marble => !extract
marble => !function
}
=
Rules take the form of "if X, then Y", and the |=>| divides the X from the Y.
This one says that if the snippet consists of the word "marble", then colour
it |!extract|. Of course this is not very useful, since it would only catch
it |!function|. Of course this is not very useful, since it would only catch
lines containing only that one word. So we really want to narrow in on smaller
snippets:
snippets. This, for example, applies its rule to each individual character
in turn:
= (sample ILDF code)
colouring {
characters {
X => !extract
K => !identifier
}
}
=
The effect of the |characters {| ... |}| block is to apply all its rules to
each character of the snippet owning it. Inside the block, then, the snippet
is always just a single character, and our rule tells us to paint the letter X
wherever it occurs.
@ The block |instances of X| narrows in on each usage of the text |X| inside
@ In the above examples, |K| and |marble| appeared without quotation marks,
but they were only allowed to do that because (a) they were single words,
(b) those words had no other meaning, and (c) they didn't contain any
awkward characters. For any more complicated texts, always use quotation
marks. For example, in
= (sample ILDF code)
"=>" => !reserved
=
the |=>| in quotes is just text, whereas the one outside quotes is being
used to divide a rule.
If you need a literal double quote inside the double-quotes, use |\"|; and
use |\\| for a literal backslash. For example:
= (sample ILDF code)
"\\\"" => !reserved
=
actually matches the text |\"|.
@h The six splits.
|characters| is an example of a "split", which splits up the original snippet
of text -- say, the line |let K = 2| -- into smaller, non-overlapping snippets
-- in this case, nine of them: |l|, |e|, |t|, | |, |K|, | |, |=|, | |, and |2|.
Every split is followed by a block of rules, which is applied to each of the
pieces in turn. Inweb works sideways-first: thus, if the block contains rules
R1, R2, ..., then R1 is applied to each piece first, then R2 to each piece,
and so on.
There are several different ways to split, all of them written in the
plural, to emphasize that they work on what are usually multiple things.
Rules, on the other hand, are written in the singular. Splits are not allowed
to be followed by |=>|: they always begin a block.
1. |characters| splits the snippet into each of its characters.
2. |characters in T| splits the snippet into each of its characters which
lie inside the text |T|. For example, here is a not very useful ILD for
plain text in which all vowels are in red:
[[../Private Languages/VowelsExample.ildf as ILDF]]
Given the text:
= (not code)
A noir, E blanc, I rouge, U vert, O bleu : voyelles,
Je dirai quelque jour vos naissances latentes :
A, noir corset velu des mouches éclatantes
Qui bombinent autour des puanteurs cruelles,
=
this produces:
= (sample VowelsExample code)
A noir, E blanc, I rouge, U vert, O bleu : voyelles,
Je dirai quelque jour vos naissances latentes :
A, noir corset velu des mouches éclatantes
Qui bombinent autour des puanteurs cruelles,
=
3. The split |instances of X| narrows in on each usage of the text |X| inside
the snippet. For example,
= (sample ILDF code)
colouring {
instances of == {
=> !reserved
}
}
[[../Private Languages/LineageExample.ildf as ILDF]]
acts on the text:
= (not code)
Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the
grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.
=
gives every usage of |==| the colour |!reserved|. Note that it never runs in
an overlapping way: the snippet |===| would be considered as having only one
instance of |==| (the first two characters), while |====| would have two.
to produce:
= (sample LineageExample code)
Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the
grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.
=
Note that it never runs in an overlapping way: the snippet |===| would be
considered as having only one instance of |==| (the first two characters),
while |====| would have two.
@ Another kind of block is |runs of C|, where |C| is a colour. For example:
= (sample ILDF code)
colouring {
runs of !identifier {
printf => !function
sscanf => !function
}
}
4. The split |runs of C|, where |C| describes a colour, splits the snippet
into non-overlapping contiguous pieces which have that colour. For example:
[[../Private Languages/RunningExample.ildf as ILDF]]
acts on:
= (not code)
Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,
who published their so-called Memoirs over the period 1798-1801.
=
If this runs on the line |if (x == 1) printf("Hello!");|, then the inner
block will run three times: its snippet will be |if|, then |x|, then |printf|.
The rules inside the block will take effect only on the third time, when it
will paint the word |printf| in |!function| colour.
to produce:
= (sample RunningExample code)
Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,
who published their so-called Memoirs over the period 1798-1801.
=
Here the hyphens in number ranges have been coloured, but not the hyphen
in "so-called".
A more computer-science sort of example would be:
[[../Private Languages/StdioExample.ildf as ILDF]]
which acts on:
= (not code)
if (x == 1) printf("Hello!");
=
to produce:
= (sample StdioExample code)
if (x == 1) printf("Hello!");
=
The split divides the line up into three runs, and the inner block runs three
times: on |if|, then |x|, then |printf|. Only the third time has any effect.
As a special form, |runs of unquoted| means "runs of characters not painted
either with |!string| or |!character|". This is special because |unquoted| is
not a colour.
@ It remains to specify what rules can do. As noted, they take the form
"if X, then Y". The following are the possibilities for X, the condition:
5. The split |matches of /E/|, where |/E/| is a regular expression (see below),
splits the snippet up into non-overlapping pieces which match it: possibly
none at all, of course, in which case the block of rules is never used.
This is easier to demonstrate than explain:
[[../Private Languages/AssemblageExample.ildf as ILDF]]
which acts on:
= (not code)
JSR .initialise
LDR A, #.data
RTS
.initialise
TAX
=
to produce:
= (sample AssemblageExample code)
JSR .initialise
LDR A, #.data
RTS
.initialise
TAX
=
1. X can be omitted altogether, and then the rule always applies. For example,
this somewhat nihilistic program gets rid of colouring entirely:
6. Lastly, the split |brackets in /E/| matches the snippet against the
regular expression |E|, and then runs the rules on each bracketed
subexpression in turn. (If there is no match, or there are no bracketed
terms in |E|, nothing happens.)
[[../Private Languages/EquationsExample.ildf as ILDF]]
acts on:
= (not code)
A = 2716
B=3
C =715 + B
D < 14
=
to produce:
= (sample EquationsExample code)
A = 2716
B=3
C =715 + B
D < 14
=
What happens here is that the expression has two bracketed terms, one for
the letter, one for the number; the rule is run first on the letter, then
on the number, and both are turned to |!function|.
@h The seven ways rules can apply.
Rules are the lines with a |=>| in. As noted, they take the form "if X, then
Y". The following are the possibilities for X, the condition.
1. The easiest thing is to give nothing at all, and then the rule always
applies. For example, this somewhat nihilistic program gets rid of colouring
entirely:
= (sample ILDF code)
colouring {
=> !plain
}
=
2. X can require the whole snippet to be of a particular colour, by writing
|colour C|. For example:
2. If X is a piece of literal text, the rule applies when the snippet is
exactly that text. For example,
= (sample ILDF code)
printf => !function
=
3. X can require the whole snippet to be of a particular colour, by writing
|coloured C|. For example:
= (sample ILDF code)
colouring {
characters {
colour !character => !plain
coloured !character => !plain
}
}
=
removes the syntax colouring on character literals.
3. X can require the snippet to be one of the language's known keywords, as
4. X can require the snippet to be one of the language's known keywords, as
declared earlier in the ILD by a |keyword| command. The syntax here is
|keyword of C|, where |C| is a colour. For example:
= (sample ILDF code)
keyword of !element => !element
=
says: if the snippet is a keyword declared as being of colour |!element|,
then actually colour it that way.
then actually colour it that way. (This is much faster than making many
comparison rules in a row, one for each keyword in the language; Inweb has
put all of the registered keywords into a hash table for rapid lookup.)
4. X can look at a little context before or after the snippet, testing it
5. X can look at a little context before or after the snippet, testing it
with one of the following: |prefix P|, |spaced prefix P|,
|optionally spaced prefix P|. These qualifiers have to do with whether white
space must appear after |P| and before the snippet. For example,
@ -403,12 +532,69 @@ space must appear after |P| and before the snippet. For example,
means that any identifier occurring after a |->| token will be coloured
as |!element|. Similarly for |suffix|.
5. And otherwise X is literal text, and the rule applies if and only if
the snippet is exactly that text. For example,
6. X can test the snippet against a regular expression, with |matching /E/|.
For example:
= (sample ILDF code)
printf => !function
runs of !identifier {
matching /.*x.*/ => !element
}
=
...turns any identifier containing a lower-case |x| into |!element| colour.
Note that |matching /x/| would not have worked, because our regular expression
is required to match the entire snippet, not just somewhere inside.
= (sample ILDF code)
characters in "0123456789" {
matching /\d\d\d\d/ => !element
}
=
...colours all four-digit numbers, but no others.
@ Now let's look at the conclusion Y of a rule. Here the possibilities are
7. Whenever a split takes place, Inweb keeps count of how many pieces there are,
and different rules can apply to differently numbered pieces. The notation
is |number N|, where |N| is the number, counting from 1. For example,
[[../Private Languages/ThirdExample.ildf as ILDF]]
acts on:
= (not code)
With how sad steps, O Moon, thou climb'st the skies!
How silently, and with how wan a face!
What, may it be that even in heav'nly place
That busy archer his sharp arrows tries!
Sure, if that long-with love-acquainted eyes
Can judge of love, thou feel'st a lover's case,
I read it in thy looks; thy languish'd grace
To me, that feel the like, thy state descries.
Then, ev'n of fellowship, O Moon, tell me,
Is constant love deem'd there but want of wit?
Are beauties there as proud as here they be?
Do they above love to be lov'd, and yet
Those lovers scorn whom that love doth possess?
Do they call virtue there ungratefulness?
=
to produce:
= (sample ThirdExample code)
With how sad steps, O Moon, thou climb'st the skies!
How silently, and with how wan a face!
What, may it be that even in heav'nly place
That busy archer his sharp arrows tries!
Sure, if that long-with love-acquainted eyes
Can judge of love, thou feel'st a lover's case,
I read it in thy looks; thy languish'd grace
To me, that feel the like, thy state descries.
Then, ev'n of fellowship, O Moon, tell me,
Is constant love deem'd there but want of wit?
Are beauties there as proud as here they be?
Do they above love to be lov'd, and yet
Those lovers scorn whom that love doth possess?
Do they call virtue there ungratefulness?
=
@ Any condition can be reversed by preceding it with |not|. For example,
= (sample ILDF code)
not coloured !string => !plain
=
@h The three ways rules can take effect.
Now let's look at the conclusion Y of a rule. Here the possibilities are
simpler:
1. If Y is the name of a colour, the snippet is painted in that colour.
@ -429,17 +615,17 @@ rules (see above), it can also be applied to the prefix or suffix: use
the notation |=> C on both| or |=> C on suffix| or |=> C on prefix|.
3. If Y is the word |debug|, then the current snippet and its colouring
are printed out on the command line.
@ The syntax of ILDs tends to avoid superfluous quotation marks as confusing,
but sometimes you need to be pedantic. If you want to match the text |=>|,
for example, that could lead to ambiguity with the rule marker |=>|. For
such occasions, simply put the text in double quotes, and change any literal
double quote in it to |\"|, and use |\\| for a literal backslash. For example:
are printed out on the command line. Thus:
= (sample ILDF code)
"keyword" => !reserved
colouring {
matches of /\d\S+/ {
=> debug
}
}
=
The rule |=> debug| is unconditional, and will print whenever it's reached.
@h Example.
@h The worm, Ouroboros.
Inweb Language Definition Format is a kind of language in itself, and in
fact Inweb is supplied with an ILD for ILDF itself, which Inweb used to
syntax-colour the examples above. Here it is, as syntax-coloured by itself:

View file

@ -0,0 +1,6 @@
Name: "AssemblageExample"
colouring {
matches of /\.[A-Za-z_][A-Za-z_0-9]*/ {
=> !function
}
}

View file

@ -0,0 +1,7 @@
Name: "EquationsExample"
colouring {
=> !plain
brackets in /.*?([A-Z])\s*=\s*(\d+).*/ {
=> !function
}
}

View file

@ -0,0 +1,7 @@
Name: "LineageExample"
colouring {
=> !plain
instances of "son" {
=> !function
}
}

View file

@ -0,0 +1,10 @@
Name: "RunningExample"
colouring {
=> !plain
characters in "0123456789" {
=> !function
}
runs of !plain {
"-" => !function
}
}

View file

@ -0,0 +1,7 @@
Name: "StdioExample"
colouring {
runs of !identifier {
printf => !function
sscanf => !function
}
}

View file

@ -0,0 +1,7 @@
Name: "ThirdExample"
colouring {
=> !plain
matches of /\S+/ {
number 3 => !function
}
}

View file

@ -0,0 +1,7 @@
Name: "VowelsExample"
colouring {
=> !plain
characters in "AEIOUaeiou" {
=> !function
}
}

File diff suppressed because it is too large Load diff

View file

@ -469,8 +469,9 @@ says <code class="display"><span class="extract">q</span></code>, the only match
</li><li>(e) <code class="display"><span class="extract">%i</span></code> means any character from the identifier class (see above);
</li><li>(f) <code class="display"><span class="extract">%p</span></code> means any character which can be used in the name of a Preform
nonterminal, which is to say, an identifier character or a hyphen;
</li><li>(g) <code class="display"><span class="extract">%P</span></code> means the same or else a colon.
</li><li>(h) <code class="display"><span class="extract">%t</span></code> means a tab.
</li><li>(g) <code class="display"><span class="extract">%P</span></code> means the same or else a colon;
</li><li>(h) <code class="display"><span class="extract">%t</span></code> means a tab;
</li><li>(i) <code class="display"><span class="extract">%q</span></code> means a double-quote.
</li></ul>
<p class="inwebparagraph"><code class="display"><span class="extract">%</span></code> otherwise makes a literal escape; a space means any whitespace character;
square brackets enclose literal alternatives, and note as usual with grep

View file

@ -524,6 +524,7 @@ little context before it (where available).
<span class="definitionkeyword">define</span> <span class="constant">UNSPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">5</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">suffix P</span></code></span>
<span class="definitionkeyword">define</span> <span class="constant">SPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">6</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">spaced suffix P</span></code></span>
<span class="definitionkeyword">define</span> <span class="constant">OPTIONALLY_SPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">7</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">optionally spaced suffix P</span></code></span>
<span class="definitionkeyword">define</span> <span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain"> </span><span class="constant">64</span>
</pre>
<pre class="display">
@ -533,6 +534,7 @@ little context before it (where available).
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">match_keyword_of_colour</span><span class="plain">; </span><span class="comment">for <code class="display"><span class="extract">keyword C</span></code>, or else <code class="display"><span class="extract">NOT_A_COLOUR</span></code></span>
<span class="reserved">struct</span><span class="plain"> </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">match_text</span><span class="plain">; </span><span class="comment">or length 0 to mean "anything"</span>
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">match_prefix</span><span class="plain">; </span><span class="comment">one of the <code class="display"><span class="extract">*_RULE_PREFIX</span></code> values above</span>
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">match_regexp_text</span><span class="plain">[</span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">];</span>
<span class="comment">the conclusion:</span>
<span class="reserved">struct</span><span class="plain"> </span><span class="reserved">colouring_language_block</span><span class="plain"> *</span><span class="identifier">execute_block</span><span class="plain">; </span><span class="comment">or <code class="display"><span class="extract">NULL</span></code>, in which case...</span>
@ -542,6 +544,7 @@ little context before it (where available).
<span class="comment">workspace during painting</span>
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">fix_position</span><span class="plain">; </span><span class="comment">where the prefix or suffix started</span>
<span class="reserved">struct</span><span class="plain"> </span><span class="reserved">match_results</span><span class="plain"> </span><span class="identifier">mr</span><span class="plain">; </span><span class="comment">of a regular expression</span>
<span class="constant">MEMORY_MANAGEMENT</span>
<span class="plain">} </span><span class="reserved">colouring_rule</span><span class="plain">;</span>
</pre>
@ -562,11 +565,15 @@ little context before it (where available).
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain"> = </span><span class="identifier">NULL</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">NOT_A_RULE_PREFIX</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_keyword_of_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_regexp_text</span><span class="plain">[0] = </span><span class="constant">0</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">set_to_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">set_prefix_to_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">execute_block</span><span class="plain"> = </span><span class="identifier">NULL</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">debug</span><span class="plain"> = </span><span class="constant">FALSE</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">fix_position</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">mr</span><span class="plain"> = </span><span class="functiontext">Regexp::create_mr</span><span class="plain">();</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">rule</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
@ -607,6 +614,8 @@ little context before it (where available).
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&amp;</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"prefix (%c+)"</span><span class="plain">)) {</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">UNSPACED_RULE_PREFIX</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain"> = </span><span class="functiontext">Languages::text</span><span class="plain">(</span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">, </span><span class="constant">FALSE</span><span class="plain">);</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&amp;</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"match (%c+)"</span><span class="plain">)) {</span>
<span class="functiontext">Languages::regexp</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_regexp_text</span><span class="plain">, </span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">);</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&amp;</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"spaced prefix (%c+)"</span><span class="plain">)) {</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">SPACED_RULE_PREFIX</span><span class="plain">;</span>
<span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain"> = </span><span class="functiontext">Languages::text</span><span class="plain">(</span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">, </span><span class="constant">FALSE</span><span class="plain">);</span>
@ -823,6 +832,81 @@ literal backslash.
<p class="endnote">The function Languages::text is used in <a href="#SP7_1">&#167;7.1</a>, <a href="#SP7_2">&#167;7.2</a>, <a href="#SP12_1">&#167;12.1</a>.</p>
<p class="inwebparagraph"><a id="SP17"></a><b>&#167;17. </b>And regular expressions.
</p>
<pre class="display">
<span class="reserved">void</span><span class="plain"> </span><span class="functiontext">Languages::regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">T</span><span class="plain">, </span><span class="reserved">text_file_position</span><span class="plain"> *</span><span class="identifier">tfp</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">write_to</span><span class="plain"> == </span><span class="identifier">NULL</span><span class="plain">) </span><span class="identifier">internal_error</span><span class="plain">(</span><span class="string">"no buffer"</span><span class="plain">);</span>
<span class="identifier">write_to</span><span class="plain">[0] = </span><span class="constant">0</span><span class="plain">;</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">) &gt; </span><span class="constant">0</span><span class="plain">) {</span>
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">from</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">, </span><span class="identifier">to</span><span class="plain"> = </span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">)-1, </span><span class="identifier">x</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">;</span>
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">to</span><span class="plain"> &gt; </span><span class="identifier">from</span><span class="plain">) &amp;&amp;</span>
<span class="plain">(</span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">from</span><span class="plain">) == </span><span class="character">'/'</span><span class="plain">) &amp;&amp; (</span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">to</span><span class="plain">) == </span><span class="character">'/'</span><span class="plain">)) {</span>
<span class="identifier">from</span><span class="plain">++; </span><span class="identifier">to</span><span class="plain">--;</span>
<span class="reserved">for</span><span class="plain"> (</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">=</span><span class="identifier">from</span><span class="plain">; </span><span class="identifier">i</span><span class="plain">&lt;=</span><span class="identifier">to</span><span class="plain">; </span><span class="identifier">i</span><span class="plain">++) {</span>
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain"> = </span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">);</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\\'</span><span class="plain">) {</span>
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">w</span><span class="plain"> = </span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">+1);</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'\\'</span><span class="plain">) {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">w</span><span class="plain">);</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'d'</span><span class="plain">) {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'d'</span><span class="plain">);</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'t'</span><span class="plain">) {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'t'</span><span class="plain">);</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'s'</span><span class="plain">) {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">' '</span><span class="plain">);</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'S'</span><span class="plain">) {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'C'</span><span class="plain">);</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'"'</span><span class="plain">) {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'q'</span><span class="plain">);</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">w</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="identifier">i</span><span class="plain">++;</span>
<span class="reserved">continue</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'.'</span><span class="plain">) {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'c'</span><span class="plain">);</span>
<span class="reserved">continue</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'%'</span><span class="plain">) {</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'%'</span><span class="plain">);</span>
<span class="reserved">continue</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">c</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> {</span>
<span class="functiontext">Errors::in_text_file</span><span class="plain">(</span>
<span class="string">"the expression to match must be in slashes '/'"</span><span class="plain">, </span><span class="identifier">tfp</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">x</span><span class="plain"> &gt;= </span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">)</span>
<span class="functiontext">Errors::in_text_file</span><span class="plain">(</span>
<span class="string">"the expression to match is too long"</span><span class="plain">, </span><span class="identifier">tfp</span><span class="plain">);</span>
<span class="plain">}</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">i</span><span class="plain"> &lt; </span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">) </span><span class="identifier">write_to</span><span class="plain">[</span><span class="identifier">i</span><span class="plain">++] = </span><span class="identifier">c</span><span class="plain">;</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">;</span>
<span class="plain">}</span>
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
<span class="identifier">i</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">, </span><span class="character">'%'</span><span class="plain">);</span>
<span class="identifier">i</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">c</span><span class="plain">);</span>
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">;</span>
<span class="plain">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="endnote">The function Languages::regexp is used in <a href="#SP12_1">&#167;12.1</a>.</p>
<p class="endnote">The function Languages::add_to_regexp appears nowhere else.</p>
<p class="endnote">The function Languages::add_escape_to_regexp appears nowhere else.</p>
<hr class="tocbar">
<ul class="toc"><li><i>(This section begins Chapter 4: Languages.)</i></li><li><a href="4-lm.html">Continue with 'Language Methods'</a></li></ul><hr class="tocbar">
<!--End of weave-->

View file

@ -378,7 +378,10 @@ rule across the whole snippet before moving on to the next.
<pre class="display">
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Painter::satisfies</span><span class="plain">(</span><span class="reserved">hash_table</span><span class="plain"> *</span><span class="identifier">HT</span><span class="plain">, </span><span class="reserved">colouring_rule</span><span class="plain"> *</span><span class="identifier">rule</span><span class="plain">, </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">matter</span><span class="plain">,</span>
<span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">colouring</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">from</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">to</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain">) &gt; </span><span class="constant">0</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="identifier">match_regexp_text</span><span class="plain">[0]) {</span>
<span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&amp;(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">mr</span><span class="plain">), </span><span class="identifier">matter</span><span class="plain">, </span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_regexp_text</span><span class="plain">) == </span><span class="constant">FALSE</span><span class="plain">)</span>
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain">) &gt; </span><span class="constant">0</span><span class="plain">) {</span>
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="identifier">match_prefix</span><span class="plain"> == </span><span class="constant">UNSPACED_RULE_PREFIX</span><span class="plain">) ||</span>
<span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> == </span><span class="constant">SPACED_RULE_PREFIX</span><span class="plain">) ||</span>
<span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> == </span><span class="constant">OPTIONALLY_SPACED_RULE_PREFIX</span><span class="plain">)) {</span>

View file

@ -1,7 +1,7 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<title>M/awwp</title>
<title>Booklet Title</title>
<meta name="viewport" content="width=device-width initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta http-equiv="Content-Language" content="en-gb">
@ -26,7 +26,7 @@
<!--Weave of 'M/spl' generated by 7-->
<ul class="crumbs"><li><a href="../webs.html">Source</a></li><li><a href="index.html">inweb</a></li><li><a href="index.html#M">Manual</a></li><li><b>Supporting Programming Languages</b></li></ul><p class="purpose">How to work with a programming language not yet supported by Inweb.</p>
<ul class="toc"><li><a href="#SP1">&#167;1. Introduction</a></li><li><a href="#SP4">&#167;4. Structure of language definitions</a></li><li><a href="#SP5">&#167;5. Properties</a></li><li><a href="#SP16">&#167;16. Secret Features</a></li><li><a href="#SP17">&#167;17. Keywords</a></li><li><a href="#SP18">&#167;18. Syntax colouring program</a></li><li><a href="#SP26">&#167;26. Example</a></li></ul><hr class="tocbar">
<ul class="toc"><li><a href="#SP1">&#167;1. Introduction</a></li><li><a href="#SP4">&#167;4. Structure of language definitions</a></li><li><a href="#SP5">&#167;5. Properties</a></li><li><a href="#SP16">&#167;16. Secret Features</a></li><li><a href="#SP17">&#167;17. Keywords</a></li><li><a href="#SP18">&#167;18. Syntax colouring program</a></li><li><a href="#SP22">&#167;22. The six splits</a></li><li><a href="#SP23">&#167;23. The seven ways rules can apply</a></li><li><a href="#SP25">&#167;25. The three ways rules can take effect</a></li><li><a href="#SP26">&#167;26. The worm, Ouroboros</a></li></ul><hr class="tocbar">
<p class="inwebparagraph"><a id="SP1"></a><b>&#167;1. Introduction. </b>To a very large extent, Inweb works the same way regardless of what language
its webs are using, and that is deliberate. On the other hand, when a web
@ -96,7 +96,9 @@ This section of the manual is about how to do it.
<p class="inwebparagraph">Once you have written a definition, use <code class="display"><span class="extract">-read-language L</span></code> at the command
line, where <code class="display"><span class="extract">L</span></code> is the file defining it. If you have many custom languages,
<code class="display"><span class="extract">-read-languages D</span></code> reads all of the definitions in a directory <code class="display"><span class="extract">D</span></code>.
<code class="display"><span class="extract">-read-languages D</span></code> reads all of the definitions in a directory <code class="display"><span class="extract">D</span></code>. Or, if
the language in question is really quite specific to a single web, you can
make a <code class="display"><span class="extract">Private Languages</span></code> subdirectory of the web and put it in there.
</p>
<p class="inwebparagraph"><a id="SP4"></a><b>&#167;4. Structure of language definitions. </b>Each language is defined by a single ILDF file. ("Inweb Language Definition
@ -111,7 +113,7 @@ so are comments, which are lines beginning with a <code class="display"><span cl
<p class="inwebparagraph">The ILD contains three sorts of thing:
</p>
<ul class="items"><li>(a) Properties, set by lines in the form <code class="display"><span class="extract">Name: C++</span></code>.
<ul class="items"><li>(a) Properties, set by lines in the form <code class="display"><span class="extract">Name: "C++"</span></code>.
</li><li>(b) Keywords, set by lines in the form <code class="display"><span class="extract">keyword int</span></code>.
</li><li>(c) A colouring program, introduced by <code class="display"><span class="extract">colouring {</span></code> and continuing until the
last block of it is closed with a <code class="display"><span class="extract">}</span></code>.
@ -122,9 +124,9 @@ practice, though, every ILD should open like so:
<pre class="display">
<span class="identifier">Name</span><span class="plain">: </span><span class="identifier">C</span>
<span class="identifier">Details</span><span class="plain">: </span><span class="identifier">The</span><span class="plain"> </span><span class="identifier">C</span><span class="plain"> </span><span class="identifier">programming</span><span class="plain"> </span><span class="identifier">language</span>
<span class="identifier">Extension</span><span class="plain">: .</span><span class="identifier">c</span>
<span class="plain">Name: </span><span class="string">"C"</span>
<span class="plain">Details: </span><span class="string">"The C programming language"</span>
<span class="plain">Extension: </span><span class="string">".c"</span>
</pre>
<p class="inwebparagraph"></p>
@ -133,7 +135,7 @@ practice, though, every ILD should open like so:
with the semi-compulsory ones.
</p>
<p class="inwebparagraph"><code class="display"><span class="extract">Name</span></code>. This is the one used by webs in their <code class="display"><span class="extract">Language: X</span></code> lines, and should
<p class="inwebparagraph"><code class="display"><span class="extract">Name</span></code>. This is the one used by webs in their <code class="display"><span class="extract">Language: "X"</span></code> lines, and should
match the ILD's own filename: wherever it is stored, the ILD for langauge <code class="display"><span class="extract">X</span></code>
should be filenamed <code class="display"><span class="extract">X.ildf</span></code>.
</p>
@ -169,9 +171,9 @@ as a pair or not at all, is the notation for multiline comments.
<pre class="display">
<span class="identifier">Multiline</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain"> </span><span class="identifier">Open</span><span class="plain">: /*</span>
<span class="identifier">Multiline</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain"> </span><span class="identifier">Close</span><span class="plain">: */</span>
<span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain">: //</span>
<span class="plain">Multiline Comment Open: </span><span class="string">"/*"</span>
<span class="plain">Multiline Comment Close: </span><span class="string">"*/"</span>
<span class="plain">Line Comment: </span><span class="string">"//"</span>
</pre>
<p class="inwebparagraph"></p>
@ -196,10 +198,10 @@ character literals.
<pre class="display">
<span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: </span><span class="string">"\""</span>
<span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
<span class="identifier">Character</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: '</span>
<span class="identifier">Character</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
<span class="plain">String Literal: </span><span class="string">"\""</span>
<span class="plain">String Literal Escape: </span><span class="string">"\\"</span>
<span class="plain">Character Literal: </span><span class="string">"'"</span>
<span class="plain">Character Literal Escape: </span><span class="string">"\\"</span>
</pre>
<p class="inwebparagraph"></p>
@ -220,9 +222,9 @@ are notations for non-decimal numbers, if they exist.
<pre class="display">
<span class="identifier">Hexadecimal</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: </span><span class="constant">0</span><span class="identifier">x</span>
<span class="identifier">Binary</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: </span><span class="constant">0</span><span class="identifier">b</span>
<span class="identifier">Negative</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: -</span>
<span class="plain">Hexadecimal Literal Prefix: </span><span class="string">"0x"</span>
<span class="plain">Binary Literal Prefix: </span><span class="string">"0b"</span>
<span class="plain">Negative Literal Prefix: </span><span class="string">"-"</span>
</pre>
<p class="inwebparagraph"></p>
@ -235,7 +237,7 @@ For example, Perl defines:
<pre class="display">
<span class="identifier">Shebang</span><span class="plain">: #!/</span><span class="identifier">usr</span><span class="plain">/</span><span class="identifier">bin</span><span class="plain">/</span><span class="identifier">perl</span><span class="plain">\</span><span class="identifier">n</span><span class="plain">\</span><span class="identifier">n</span>
<span class="plain">Shebang: </span><span class="string">"#!/usr/bin/perl\n\n"</span>
</pre>
<p class="inwebparagraph">Most languages do not have a shebang.
@ -255,7 +257,7 @@ that this language does, and gives the notation. For example, C provides:
<pre class="display">
<span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Marker</span><span class="plain">: </span><span class="string">"#line %d \"%f\"\n"</span>
<span class="plain">Line Marker: </span><span class="string">"#line %d \"%f\"\n"</span>
</pre>
<p class="inwebparagraph">Here <code class="display"><span class="extract">%d</span></code> expands to the line number, and <code class="display"><span class="extract">%f</span></code> the filename, of origin.
@ -272,8 +274,8 @@ matter added. This material is in <code class="display"><span class="extract">Be
<pre class="display">
<span class="identifier">Before</span><span class="plain"> </span><span class="identifier">Named</span><span class="plain"> </span><span class="identifier">Paragraph</span><span class="plain"> </span><span class="identifier">Expansion</span><span class="plain">: \</span><span class="identifier">n</span><span class="reserved">{</span><span class="plain">\</span><span class="identifier">n</span>
<span class="identifier">After</span><span class="plain"> </span><span class="identifier">Named</span><span class="plain"> </span><span class="identifier">Paragraph</span><span class="plain"> </span><span class="identifier">Expansion</span><span class="plain">: </span><span class="reserved">}</span><span class="plain">\</span><span class="identifier">n</span>
<span class="plain">Before Named Paragraph Expansion: </span><span class="string">"\n{\n"</span>
<span class="plain">After Named Paragraph Expansion: </span><span class="string">"}\n"</span>
</pre>
<p class="inwebparagraph">The effect of this is to ensure that code such as:
@ -317,8 +319,8 @@ if given, places any ending notation. For example, Inform 6 defines:
<pre class="display">
<span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain">: </span><span class="identifier">Constant</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain"> =\</span><span class="identifier">s</span>
<span class="identifier">End</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain">: ;\</span><span class="identifier">n</span>
<span class="plain">Start Definition: </span><span class="string">"Constant %S =\s"</span>
<span class="plain">End Definition: </span><span class="string">";\n"</span>
</pre>
<p class="inwebparagraph">where <code class="display"><span class="extract">%S</span></code> expands to the name of the term to be defined. Thus, we might tangle
@ -327,7 +329,7 @@ out to:
<pre class="display">
<span class="identifier">Constant</span><span class="plain"> </span><span class="identifier">TAXICAB</span><span class="plain"> = </span><span class="constant">1729</span><span class="plain">;\</span><span class="identifier">n</span>
<span class="plain">Constant TAXICAB = 1729;\n</span>
</pre>
<p class="inwebparagraph">Inweb ignores all definitions unless one of these three properties is given.
@ -342,10 +344,10 @@ and in <code class="display"><span class="extract">Start Ifndef</span></code> an
<pre class="display">
<span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Ifdef</span><span class="plain">: #</span><span class="identifier">ifdef</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain">;\</span><span class="identifier">n</span>
<span class="identifier">End</span><span class="plain"> </span><span class="identifier">Ifdef</span><span class="plain">: #</span><span class="identifier">endif</span><span class="plain">; ! %</span><span class="identifier">S</span><span class="plain">\</span><span class="identifier">n</span>
<span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Ifndef</span><span class="plain">: #</span><span class="identifier">ifndef</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain">;\</span><span class="identifier">n</span>
<span class="identifier">End</span><span class="plain"> </span><span class="identifier">Ifndef</span><span class="plain">: #</span><span class="identifier">endif</span><span class="plain">; ! %</span><span class="identifier">S</span><span class="plain">\</span><span class="identifier">n</span>
<span class="plain">Start Ifdef: </span><span class="string">"#ifdef %S;\n"</span>
<span class="plain">End Ifdef: </span><span class="string">"#endif; ! %S\n"</span>
<span class="plain">Start Ifndef: </span><span class="string">"#ifndef %S;\n"</span>
<span class="plain">End Ifndef: </span><span class="string">"#endif; ! %S\n"</span>
</pre>
<p class="inwebparagraph">which is a subtly different notation from the C one. Again, <code class="display"><span class="extract">%S</span></code> expands to
@ -393,7 +395,7 @@ in the language in question. For C, then, we include the line:
<pre class="display">
<span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">void</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">void</span>
</pre>
<p class="inwebparagraph">Keywords can be declared in a number of categories, which are identified by
@ -403,7 +405,7 @@ for example:
<pre class="display">
<span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">isdigit</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">function</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">isdigit</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!function</span>
</pre>
<p class="inwebparagraph">makes a keyword of colour <code class="display"><span class="extract">!function</span></code>.
@ -420,8 +422,8 @@ palette of possibilities:
<pre class="display">
<span class="plain">!</span><span class="element">character</span><span class="plain"> !</span><span class="element">comment</span><span class="plain"> !</span><span class="element">constant</span><span class="plain"> !</span><span class="element">definition</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> !</span><span class="element">extract</span>
<span class="plain">!</span><span class="element">function</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> !</span><span class="element">plain</span><span class="plain"> !</span><span class="element">reserved</span><span class="plain"> !</span><span class="element">string</span>
<span class="element">!character</span><span class="plain"> </span><span class="element">!comment</span><span class="plain"> </span><span class="element">!constant</span><span class="plain"> </span><span class="element">!definition</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="element">!extract</span>
<span class="element">!function</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="element">!plain</span><span class="plain"> </span><span class="element">!reserved</span><span class="plain"> </span><span class="element">!string</span>
</pre>
<p class="inwebparagraph">Each character has its own colour. At the start of the process, every
@ -454,7 +456,7 @@ empty program is legal but does nothing:
<pre class="display">
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">}</span>
</pre>
@ -466,68 +468,225 @@ block, that's a line of source code. Blocks normally contain one or more
<pre class="display">
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">marble</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">extract</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="string">marble</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">Rules take the form of "if X, then Y", and the <code class="display"><span class="extract">=&gt;</span></code> divides the X from the Y.
This one says that if the snippet consists of the word "marble", then colour
it <code class="display"><span class="extract">!extract</span></code>. Of course this is not very useful, since it would only catch
it <code class="display"><span class="extract">!function</span></code>. Of course this is not very useful, since it would only catch
lines containing only that one word. So we really want to narrow in on smaller
snippets:
snippets. This, for example, applies its rule to each individual character
in turn:
</p>
<pre class="display">
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">characters</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">X</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">extract</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
<span class="string">K</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!identifier</span>
<span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">The effect of the <code class="display"><span class="extract">characters {</span></code> ... <code class="display"><span class="extract">}</span></code> block is to apply all its rules to
each character of the snippet owning it. Inside the block, then, the snippet
is always just a single character, and our rule tells us to paint the letter X
wherever it occurs.
<p class="inwebparagraph"></p>
<p class="inwebparagraph"><a id="SP21"></a><b>&#167;21. </b>In the above examples, <code class="display"><span class="extract">K</span></code> and <code class="display"><span class="extract">marble</span></code> appeared without quotation marks,
but they were only allowed to do that because (a) they were single words,
</p>
<p class="inwebparagraph"><a id="SP21"></a><b>&#167;21. </b>The block <code class="display"><span class="extract">instances of X</span></code> narrows in on each usage of the text <code class="display"><span class="extract">X</span></code> inside
<ul class="items"><li>(b) those words had no other meaning, and (c) they didn't contain any
awkward characters. For any more complicated texts, always use quotation
marks. For example, in
</li></ul>
<pre class="display">
<span class="string">"=&gt;"</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
</pre>
<p class="inwebparagraph">the <code class="display"><span class="extract">=&gt;</span></code> in quotes is just text, whereas the one outside quotes is being
used to divide a rule.
</p>
<p class="inwebparagraph">If you need a literal double quote inside the double-quotes, use <code class="display"><span class="extract">\"</span></code>; and
use <code class="display"><span class="extract">\\</span></code> for a literal backslash. For example:
</p>
<pre class="display">
<span class="string">"\\\""</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
</pre>
<p class="inwebparagraph">actually matches the text <code class="display"><span class="extract">\"</span></code>.
</p>
<p class="inwebparagraph"><a id="SP22"></a><b>&#167;22. The six splits. </b><code class="display"><span class="extract">characters</span></code> is an example of a "split", which splits up the original snippet
of text &mdash; say, the line <code class="display"><span class="extract">let K = 2</span></code> &mdash; into smaller, non-overlapping snippets
&mdash; in this case, nine of them: <code class="display"><span class="extract">l</span></code>, <code class="display"><span class="extract">e</span></code>, <code class="display"><span class="extract">t</span></code>, <code class="display"><span class="extract"> </span></code>, <code class="display"><span class="extract">K</span></code>, <code class="display"><span class="extract"> </span></code>, <code class="display"><span class="extract">=</span></code>, <code class="display"><span class="extract"> </span></code>, and <code class="display"><span class="extract">2</span></code>.
Every split is followed by a block of rules, which is applied to each of the
pieces in turn. Inweb works sideways-first: thus, if the block contains rules
R1, R2, ..., then R1 is applied to each piece first, then R2 to each piece,
and so on.
</p>
<p class="inwebparagraph">There are several different ways to split, all of them written in the
plural, to emphasize that they work on what are usually multiple things.
Rules, on the other hand, are written in the singular. Splits are not allowed
to be followed by <code class="display"><span class="extract">=&gt;</span></code>: they always begin a block.
</p>
<p class="inwebparagraph">1. <code class="display"><span class="extract">characters</span></code> splits the snippet into each of its characters.
</p>
<p class="inwebparagraph">2. <code class="display"><span class="extract">characters in T</span></code> splits the snippet into each of its characters which
lie inside the text <code class="display"><span class="extract">T</span></code>. For example, here is a not very useful ILD for
plain text in which all vowels are in red:
</p>
<pre class="display">
<span class="plain">Name: </span><span class="string">"VowelsExample"</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
<span class="plain"> </span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"AEIOUaeiou"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">Given the text:
</p>
<pre class="display">
<span class="plain">A noir, E blanc, I rouge, U vert, O bleu : voyelles,</span>
<span class="plain">Je dirai quelque jour vos naissances latentes :</span>
<span class="plain">A, noir corset velu des mouches éclatantes</span>
<span class="plain">Qui bombinent autour des puanteurs cruelles,</span>
</pre>
<p class="inwebparagraph">this produces:
</p>
<pre class="display">
<span class="functiontext">A</span><span class="plain"> n</span><span class="functiontext">oi</span><span class="plain">r, </span><span class="functiontext">E</span><span class="plain"> bl</span><span class="functiontext">a</span><span class="plain">nc, </span><span class="functiontext">I</span><span class="plain"> r</span><span class="functiontext">ou</span><span class="plain">g</span><span class="functiontext">e</span><span class="plain">, </span><span class="functiontext">U</span><span class="plain"> v</span><span class="functiontext">e</span><span class="plain">rt, </span><span class="functiontext">O</span><span class="plain"> bl</span><span class="functiontext">eu</span><span class="plain"> : v</span><span class="functiontext">o</span><span class="plain">y</span><span class="functiontext">e</span><span class="plain">ll</span><span class="functiontext">e</span><span class="plain">s,</span>
<span class="plain">J</span><span class="functiontext">e</span><span class="plain"> d</span><span class="functiontext">i</span><span class="plain">r</span><span class="functiontext">ai</span><span class="plain"> q</span><span class="functiontext">ue</span><span class="plain">lq</span><span class="functiontext">ue</span><span class="plain"> j</span><span class="functiontext">ou</span><span class="plain">r v</span><span class="functiontext">o</span><span class="plain">s n</span><span class="functiontext">ai</span><span class="plain">ss</span><span class="functiontext">a</span><span class="plain">nc</span><span class="functiontext">e</span><span class="plain">s l</span><span class="functiontext">a</span><span class="plain">t</span><span class="functiontext">e</span><span class="plain">nt</span><span class="functiontext">e</span><span class="plain">s :</span>
<span class="functiontext">A</span><span class="plain">, n</span><span class="functiontext">oi</span><span class="plain">r c</span><span class="functiontext">o</span><span class="plain">rs</span><span class="functiontext">e</span><span class="plain">t v</span><span class="functiontext">e</span><span class="plain">l</span><span class="functiontext">u</span><span class="plain"> d</span><span class="functiontext">e</span><span class="plain">s m</span><span class="functiontext">ou</span><span class="plain">ch</span><span class="functiontext">e</span><span class="plain">s écl</span><span class="functiontext">a</span><span class="plain">t</span><span class="functiontext">a</span><span class="plain">nt</span><span class="functiontext">e</span><span class="plain">s</span>
<span class="plain">Q</span><span class="functiontext">ui</span><span class="plain"> b</span><span class="functiontext">o</span><span class="plain">mb</span><span class="functiontext">i</span><span class="plain">n</span><span class="functiontext">e</span><span class="plain">nt </span><span class="functiontext">au</span><span class="plain">t</span><span class="functiontext">ou</span><span class="plain">r d</span><span class="functiontext">e</span><span class="plain">s p</span><span class="functiontext">ua</span><span class="plain">nt</span><span class="functiontext">eu</span><span class="plain">rs cr</span><span class="functiontext">ue</span><span class="plain">ll</span><span class="functiontext">e</span><span class="plain">s,</span>
</pre>
<p class="inwebparagraph">3. The split <code class="display"><span class="extract">instances of X</span></code> narrows in on each usage of the text <code class="display"><span class="extract">X</span></code> inside
the snippet. For example,
</p>
<pre class="display">
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> == </span><span class="reserved">{</span>
<span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
<span class="reserved">}</span>
<span class="reserved">}</span>
<span class="plain">Name: </span><span class="string">"LineageExample"</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
<span class="plain"> </span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"son"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">gives every usage of <code class="display"><span class="extract">==</span></code> the colour <code class="display"><span class="extract">!reserved</span></code>. Note that it never runs in
an overlapping way: the snippet <code class="display"><span class="extract">===</span></code> would be considered as having only one
instance of <code class="display"><span class="extract">==</span></code> (the first two characters), while <code class="display"><span class="extract">====</span></code> would have two.
</p>
<p class="inwebparagraph"><a id="SP22"></a><b>&#167;22. </b>Another kind of block is <code class="display"><span class="extract">runs of C</span></code>, where <code class="display"><span class="extract">C</span></code> is a colour. For example:
<p class="inwebparagraph">acts on the text:
</p>
<pre class="display">
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">printf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">function</span>
<span class="identifier">sscanf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">function</span>
<span class="reserved">}</span>
<span class="reserved">}</span>
<span class="plain">Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the</span>
<span class="plain">grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.</span>
</pre>
<p class="inwebparagraph">If this runs on the line <code class="display"><span class="extract">if (x == 1) printf("Hello!");</span></code>, then the inner
block will run three times: its snippet will be <code class="display"><span class="extract">if</span></code>, then <code class="display"><span class="extract">x</span></code>, then <code class="display"><span class="extract">printf</span></code>.
The rules inside the block will take effect only on the third time, when it
will paint the word <code class="display"><span class="extract">printf</span></code> in <code class="display"><span class="extract">!function</span></code> colour.
<p class="inwebparagraph">to produce:
</p>
<pre class="display">
<span class="plain">Jacob first appears in the Book of Genesis, the </span><span class="functiontext">son</span><span class="plain"> of Isaac and Rebecca, the</span>
<span class="plain">grand</span><span class="functiontext">son</span><span class="plain"> of Abraham, Sarah and Bethuel, the nephew of Ishmael.</span>
</pre>
<p class="inwebparagraph">Note that it never runs in an overlapping way: the snippet <code class="display"><span class="extract">===</span></code> would be
considered as having only one instance of <code class="display"><span class="extract">==</span></code> (the first two characters),
while <code class="display"><span class="extract">====</span></code> would have two.
</p>
<p class="inwebparagraph">4. The split <code class="display"><span class="extract">runs of C</span></code>, where <code class="display"><span class="extract">C</span></code> describes a colour, splits the snippet
into non-overlapping contiguous pieces which have that colour. For example:
</p>
<pre class="display">
<span class="plain">Name: </span><span class="string">"RunningExample"</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
<span class="plain"> </span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"0123456789"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="plain"> </span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!plain</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="string">"-"</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">acts on:
</p>
<pre class="display">
<span class="plain">Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,</span>
<span class="plain">who published their so-called Memoirs over the period 1798-1801.</span>
</pre>
<p class="inwebparagraph">to produce:
</p>
<pre class="display">
<span class="plain">Napoleon Bonaparte (</span><span class="functiontext">1769-1821</span><span class="plain">) took </span><span class="functiontext">167</span><span class="plain"> scientists to Egypt in </span><span class="functiontext">1798</span><span class="plain">,</span>
<span class="plain">who published their so-called Memoirs over the period </span><span class="functiontext">1798-1801</span><span class="plain">.</span>
</pre>
<p class="inwebparagraph">Here the hyphens in number ranges have been coloured, but not the hyphen
in "so-called".
</p>
<p class="inwebparagraph">A more computer-science sort of example would be:
</p>
<pre class="display">
<span class="plain">Name: </span><span class="string">"StdioExample"</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="string">printf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="string">sscanf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">which acts on:
</p>
<pre class="display">
<span class="plain">if (x == 1) printf("Hello!");</span>
</pre>
<p class="inwebparagraph">to produce:
</p>
<pre class="display">
<span class="identifier">if</span><span class="plain"> (</span><span class="identifier">x</span><span class="plain"> == </span><span class="constant">1</span><span class="plain">) </span><span class="functiontext">printf</span><span class="plain">("</span><span class="identifier">Hello</span><span class="plain">!");</span>
</pre>
<p class="inwebparagraph">The split divides the line up into three runs, and the inner block runs three
times: on <code class="display"><span class="extract">if</span></code>, then <code class="display"><span class="extract">x</span></code>, then <code class="display"><span class="extract">printf</span></code>. Only the third time has any effect.
</p>
<p class="inwebparagraph">As a special form, <code class="display"><span class="extract">runs of unquoted</span></code> means "runs of characters not painted
@ -535,30 +694,124 @@ either with <code class="display"><span class="extract">!string</span></code> or
not a colour.
</p>
<p class="inwebparagraph"><a id="SP23"></a><b>&#167;23. </b>It remains to specify what rules can do. As noted, they take the form
"if X, then Y". The following are the possibilities for X, the condition:
<p class="inwebparagraph">5. The split <code class="display"><span class="extract">matches of /E/</span></code>, where <code class="display"><span class="extract">/E/</span></code> is a regular expression (see below),
splits the snippet up into non-overlapping pieces which match it: possibly
none at all, of course, in which case the block of rules is never used.
This is easier to demonstrate than explain:
</p>
<p class="inwebparagraph">1. X can be omitted altogether, and then the rule always applies. For example,
this somewhat nihilistic program gets rid of colouring entirely:
<pre class="display">
<span class="plain">Name: </span><span class="string">"AssemblageExample"</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\.[A-Za-z_][A-Za-z_0-9]*/</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">which acts on:
</p>
<pre class="display">
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">plain</span>
<span class="plain">JSR .initialise</span>
<span class="plain">LDR A, #.data</span>
<span class="plain">RTS</span>
<span class="plain">.initialise</span>
<span class="plain">TAX</span>
</pre>
<p class="inwebparagraph">to produce:
</p>
<pre class="display">
<span class="identifier">JSR</span><span class="plain"> </span><span class="functiontext">.initialise</span>
<span class="identifier">LDR</span><span class="plain"> </span><span class="identifier">A</span><span class="plain">, #</span><span class="functiontext">.data</span>
<span class="identifier">RTS</span>
<span class="functiontext">.initialise</span>
<span class="identifier">TAX</span>
</pre>
<p class="inwebparagraph">6. Lastly, the split <code class="display"><span class="extract">brackets in /E/</span></code> matches the snippet against the
regular expression <code class="display"><span class="extract">E</span></code>, and then runs the rules on each bracketed
subexpression in turn. (If there is no match, or there are no bracketed
terms in <code class="display"><span class="extract">E</span></code>, nothing happens.)
</p>
<pre class="display">
<span class="plain">Name: </span><span class="string">"EquationsExample"</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
<span class="plain"> </span><span class="reserved">brackets</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="functiontext">/.*?([A-Z])\s*=\s*(\d+).*/</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">acts on:
</p>
<pre class="display">
<span class="plain">A = 2716</span>
<span class="plain">B=3</span>
<span class="plain">C =715 + B</span>
<span class="plain">D &lt; 14</span>
</pre>
<p class="inwebparagraph">to produce:
</p>
<pre class="display">
<span class="functiontext">A</span><span class="plain"> = </span><span class="functiontext">2716</span>
<span class="functiontext">B</span><span class="plain">=</span><span class="functiontext">3</span>
<span class="functiontext">C</span><span class="plain"> =</span><span class="functiontext">715</span><span class="plain"> + B</span>
<span class="plain">D &lt; 14</span>
</pre>
<p class="inwebparagraph">What happens here is that the expression has two bracketed terms, one for
the letter, one for the number; the rule is run first on the letter, then
on the number, and both are turned to <code class="display"><span class="extract">!function</span></code>.
</p>
<p class="inwebparagraph"><a id="SP23"></a><b>&#167;23. The seven ways rules can apply. </b>Rules are the lines with a <code class="display"><span class="extract">=&gt;</span></code> in. As noted, they take the form "if X, then
Y". The following are the possibilities for X, the condition.
</p>
<p class="inwebparagraph">1. The easiest thing is to give nothing at all, and then the rule always
applies. For example, this somewhat nihilistic program gets rid of colouring
entirely:
</p>
<pre class="display">
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">2. X can require the whole snippet to be of a particular colour, by writing
<code class="display"><span class="extract">colour C</span></code>. For example:
<p class="inwebparagraph">2. If X is a piece of literal text, the rule applies when the snippet is
exactly that text. For example,
</p>
<pre class="display">
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">characters</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">colour</span><span class="plain"> !</span><span class="element">character</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">plain</span>
<span class="string">printf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
</pre>
<p class="inwebparagraph">3. X can require the whole snippet to be of a particular colour, by writing
<code class="display"><span class="extract">coloured C</span></code>. For example:
</p>
<pre class="display">
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">coloured</span><span class="plain"> </span><span class="element">!character</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
<span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
@ -566,21 +819,23 @@ this somewhat nihilistic program gets rid of colouring entirely:
<p class="inwebparagraph">removes the syntax colouring on character literals.
</p>
<p class="inwebparagraph">3. X can require the snippet to be one of the language's known keywords, as
<p class="inwebparagraph">4. X can require the snippet to be one of the language's known keywords, as
declared earlier in the ILD by a <code class="display"><span class="extract">keyword</span></code> command. The syntax here is
<code class="display"><span class="extract">keyword of C</span></code>, where <code class="display"><span class="extract">C</span></code> is a colour. For example:
</p>
<pre class="display">
<span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
</pre>
<p class="inwebparagraph">says: if the snippet is a keyword declared as being of colour <code class="display"><span class="extract">!element</span></code>,
then actually colour it that way.
then actually colour it that way. (This is much faster than making many
comparison rules in a row, one for each keyword in the language; Inweb has
put all of the registered keywords into a hash table for rapid lookup.)
</p>
<p class="inwebparagraph">4. X can look at a little context before or after the snippet, testing it
<p class="inwebparagraph">5. X can look at a little context before or after the snippet, testing it
with one of the following: <code class="display"><span class="extract">prefix P</span></code>, <code class="display"><span class="extract">spaced prefix P</span></code>,
<code class="display"><span class="extract">optionally spaced prefix P</span></code>. These qualifiers have to do with whether white
space must appear after <code class="display"><span class="extract">P</span></code> and before the snippet. For example,
@ -588,8 +843,8 @@ space must appear after <code class="display"><span class="extract">P</span></co
<pre class="display">
<span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">prefix</span><span class="plain"> </span><span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> -&gt; </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
<span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">prefix</span><span class="plain"> </span><span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> -&gt; </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
<span class="reserved">}</span>
</pre>
@ -597,18 +852,103 @@ space must appear after <code class="display"><span class="extract">P</span></co
as <code class="display"><span class="extract">!element</span></code>. Similarly for <code class="display"><span class="extract">suffix</span></code>.
</p>
<p class="inwebparagraph">5. And otherwise X is literal text, and the rule applies if and only if
the snippet is exactly that text. For example,
<p class="inwebparagraph">6. X can test the snippet against a regular expression, with <code class="display"><span class="extract">matching /E/</span></code>.
For example:
</p>
<pre class="display">
<span class="identifier">printf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">function</span>
<span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">matching</span><span class="plain"> </span><span class="functiontext">/.*x.*/</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">...turns any identifier containing a lower-case <code class="display"><span class="extract">x</span></code> into <code class="display"><span class="extract">!element</span></code> colour.
Note that <code class="display"><span class="extract">matching /x/</span></code> would not have worked, because our regular expression
is required to match the entire snippet, not just somewhere inside.
</p>
<pre class="display">
<span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"0123456789"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">matching</span><span class="plain"> </span><span class="functiontext">/\d\d\d\d/</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">...colours all four-digit numbers, but no others.
</p>
<p class="inwebparagraph">7. Whenever a split takes place, Inweb keeps count of how many pieces there are,
and different rules can apply to differently numbered pieces. The notation
is <code class="display"><span class="extract">number N</span></code>, where <code class="display"><span class="extract">N</span></code> is the number, counting from 1. For example,
</p>
<pre class="display">
<span class="plain">Name: </span><span class="string">"ThirdExample"</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
<span class="plain"> </span><span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\S+/</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="string">number</span><span class="plain"> </span><span class="string">3</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph">acts on:
</p>
<pre class="display">
<span class="plain">With how sad steps, O Moon, thou climb'st the skies!</span>
<span class="plain">How silently, and with how wan a face!</span>
<span class="plain">What, may it be that even in heav'nly place</span>
<span class="plain">That busy archer his sharp arrows tries!</span>
<span class="plain">Sure, if that long-with love-acquainted eyes</span>
<span class="plain">Can judge of love, thou feel'st a lover's case,</span>
<span class="plain">I read it in thy looks; thy languish'd grace</span>
<span class="plain">To me, that feel the like, thy state descries.</span>
<span class="plain">Then, ev'n of fellowship, O Moon, tell me,</span>
<span class="plain">Is constant love deem'd there but want of wit?</span>
<span class="plain">Are beauties there as proud as here they be?</span>
<span class="plain">Do they above love to be lov'd, and yet</span>
<span class="plain">Those lovers scorn whom that love doth possess?</span>
<span class="plain">Do they call virtue there ungratefulness?</span>
</pre>
<p class="inwebparagraph">to produce:
</p>
<pre class="display">
<span class="plain">With how </span><span class="functiontext">sad</span><span class="plain"> steps, O Moon, thou climb'st the skies!</span>
<span class="plain">How silently, </span><span class="functiontext">and</span><span class="plain"> with how wan a face!</span>
<span class="plain">What, may </span><span class="functiontext">it</span><span class="plain"> be that even in heav'nly place</span>
<span class="plain">That busy </span><span class="functiontext">archer</span><span class="plain"> his sharp arrows tries!</span>
<span class="plain">Sure, if </span><span class="functiontext">that</span><span class="plain"> long-with love-acquainted eyes</span>
<span class="plain">Can judge </span><span class="functiontext">of</span><span class="plain"> love, thou feel'st a lover's case,</span>
<span class="plain">I read </span><span class="functiontext">it</span><span class="plain"> in thy looks; thy languish'd grace</span>
<span class="plain">To me, </span><span class="functiontext">that</span><span class="plain"> feel the like, thy state descries.</span>
<span class="plain">Then, ev'n </span><span class="functiontext">of</span><span class="plain"> fellowship, O Moon, tell me,</span>
<span class="plain">Is constant </span><span class="functiontext">love</span><span class="plain"> deem'd there but want of wit?</span>
<span class="plain">Are beauties </span><span class="functiontext">there</span><span class="plain"> as proud as here they be?</span>
<span class="plain">Do they </span><span class="functiontext">above</span><span class="plain"> love to be lov'd, and yet</span>
<span class="plain">Those lovers </span><span class="functiontext">scorn</span><span class="plain"> whom that love doth possess?</span>
<span class="plain">Do they </span><span class="functiontext">call</span><span class="plain"> virtue there ungratefulness?</span>
</pre>
<p class="inwebparagraph"></p>
<p class="inwebparagraph"><a id="SP24"></a><b>&#167;24. </b>Now let's look at the conclusion Y of a rule. Here the possibilities are
<p class="inwebparagraph"><a id="SP24"></a><b>&#167;24. </b>Any condition can be reversed by preceding it with <code class="display"><span class="extract">not</span></code>. For example,
</p>
<pre class="display">
<span class="reserved">not</span><span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!string</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
</pre>
<p class="inwebparagraph"></p>
<p class="inwebparagraph"><a id="SP25"></a><b>&#167;25. The three ways rules can take effect. </b>Now let's look at the conclusion Y of a rule. Here the possibilities are
simpler:
</p>
@ -621,9 +961,9 @@ applied to the snippet only if this rule has matched. For example,
<pre class="display">
<span class="identifier">keyword</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="reserved">{</span>
<span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> </span><span class="identifier">prefix</span><span class="plain"> . </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
<span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> </span><span class="identifier">prefix</span><span class="plain"> -&gt; </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> </span><span class="reserved">prefix</span><span class="plain"> . </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
<span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> </span><span class="reserved">prefix</span><span class="plain"> -&gt; </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
<span class="reserved">}</span>
</pre>
@ -637,55 +977,92 @@ the notation <code class="display"><span class="extract">=&gt; C on both</span><
</p>
<p class="inwebparagraph">3. If Y is the word <code class="display"><span class="extract">debug</span></code>, then the current snippet and its colouring
are printed out on the command line.
</p>
<p class="inwebparagraph"><a id="SP25"></a><b>&#167;25. </b>The syntax of ILDs tends to avoid superfluous quotation marks as confusing,
but sometimes you need to be pedantic. If you want to match the text <code class="display"><span class="extract">=&gt;</span></code>,
for example, that could lead to ambiguity with the rule marker <code class="display"><span class="extract">=&gt;</span></code>. For
such occasions, simply put the text in double quotes, and change any literal
double quote in it to <code class="display"><span class="extract">\"</span></code>, and use <code class="display"><span class="extract">\\</span></code> for a literal backslash. For example:
are printed out on the command line. Thus:
</p>
<pre class="display">
<span class="string">"keyword"</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\d\S+/</span><span class="plain"> </span><span class="reserved">{</span>
<span class="reserved">=&gt;</span><span class="plain"> </span><span class="reserved">debug</span>
<span class="reserved">}</span>
<span class="reserved">}</span>
</pre>
<p class="inwebparagraph"></p>
<p class="inwebparagraph">The rule <code class="display"><span class="extract">=&gt; debug</span></code> is unconditional, and will print whenever it's reached.
</p>
<p class="inwebparagraph"><a id="SP26"></a><b>&#167;26. Example. </b>Inweb Language Definition Format is a kind of language in itself, and in
<p class="inwebparagraph"><a id="SP26"></a><b>&#167;26. The worm, Ouroboros. </b>Inweb Language Definition Format is a kind of language in itself, and in
fact Inweb is supplied with an ILD for ILDF itself, which Inweb used to
syntax-colour the examples above. Here it is, as syntax-coloured by itself:
</p>
<pre class="display">
<span class="identifier">Name</span><span class="plain">: </span><span class="identifier">ILDF</span>
<span class="identifier">Details</span><span class="plain">: </span><span class="identifier">The</span><span class="plain"> </span><span class="identifier">Inweb</span><span class="plain"> </span><span class="identifier">Language</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain"> </span><span class="identifier">File</span><span class="plain"> </span><span class="identifier">format</span>
<span class="identifier">Extension</span><span class="plain">: .</span><span class="identifier">ildf</span>
<span class="identifier">Whole</span><span class="plain"> </span><span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain">: #</span>
<span class="plain">Name: </span><span class="string">"ILDF"</span>
<span class="plain">Details: </span><span class="string">"The Inweb Language Definition File format"</span>
<span class="plain">Extension: </span><span class="string">".ildf"</span>
<span class="plain">Whole Line Comment: </span><span class="string">"#"</span>
<span class="plain">Supports Namespaces: </span><span class="reserved">false</span>
<span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: </span><span class="string">"\""</span>
<span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
<span class="plain">String Literal: </span><span class="string">"\""</span>
<span class="plain">String Literal Escape: </span><span class="string">"\\"</span>
<span class="identifier">keyword</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span>
<span class="plain">#</span><span class="comment"> Regular expressions are handled here as if character literals</span>
<span class="plain">Character Literal: </span><span class="string">"/"</span>
<span class="plain">Character Literal Escape: </span><span class="string">"\\"</span>
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="identifier">prefix</span><span class="plain"> ! </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
<span class="plain"> </span><span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"both"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"brackets"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"characters"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"coloured"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"colouring"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"debug"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"false"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"in"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"instances"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"keyword"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"matches"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"matching"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"not"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"of"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"on"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"optionally"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"prefix"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"runs"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"spaced"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"suffix"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"true"</span>
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"unquoted"</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span>
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">prefix</span><span class="plain"> </span><span class="string">"!"</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">on</span><span class="plain"> </span><span class="reserved">both</span>
<span class="plain"> </span><span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
<span class="plain"> </span><span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!reserved</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="plain"> </span><span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"=&gt;"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
<span class="plain"> </span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"=&gt;"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="plain"> </span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"{"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
<span class="plain"> </span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"{"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="plain"> </span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"}"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
<span class="plain"> </span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"}"</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="plain"> </span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> #</span><span class="comment"> Anything left of these colours will be unquoted strings, so...</span>
<span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!constant</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!string</span>
<span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!string</span>
<span class="plain"> #</span><span class="comment"> Regular expressions, now coloured !character, are more like functions</span>
<span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!character</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="plain"> #</span><span class="comment"> Detect Property: Value lines, not being fooled by a colon inside quotes</span>
<span class="plain"> </span><span class="reserved">brackets</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="functiontext">/\s*([A-Z][^"]*):.*/</span><span class="plain"> </span><span class="reserved">{</span>
<span class="plain"> #</span><span class="comment"> Uncolour only the bracketed part, i.e., the Property part</span>
<span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
<span class="plain"> </span><span class="reserved">}</span>
<span class="reserved">}</span>
</pre>

View file

@ -131,6 +131,7 @@ typedef struct match_results {
int no_matched_texts;
struct match_result exp_storage[MAX_BRACKETED_SUBEXPRESSIONS];
struct text_stream *exp[MAX_BRACKETED_SUBEXPRESSIONS];
int exp_at[MAX_BRACKETED_SUBEXPRESSIONS];
} match_results;
@ Match result objects are inherently ephemeral, and we can expect to be
@ -143,8 +144,10 @@ deallocate.
match_results Regexp::create_mr(void) {
match_results mr;
mr.no_matched_texts = 0;
for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++) {
mr.exp[i] = NULL;
mr.exp_at[i] = -1;
}
return mr;
}
@ -169,10 +172,27 @@ int Regexp::match(match_results *mr, text_stream *text, wchar_t *pattern) {
return rv;
}
int Regexp::match_from(match_results *mr, text_stream *text, wchar_t *pattern,
int x, int allow_partial) {
int match_to = x;
if (x < Str::len(text)) {
if (mr) Regexp::prepare(mr);
match_position at;
at.tpos = x; at.ppos = 0; at.bc = 0; at.bl = 0;
match_to = Regexp::match_r(mr, text, pattern, &at, allow_partial);
if (match_to == -1) {
match_to = x;
if (mr) Regexp::dispose_of(mr);
}
}
return match_to - x;
}
void Regexp::prepare(match_results *mr) {
if (mr) {
mr->no_matched_texts = 0;
for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++) {
mr->exp_at[i] = -1;
if (mr->exp[i]) STREAM_CLOSE(mr->exp[i]);
mr->exp_storage[i].match_text_struct =
Streams::new_buffer(
@ -276,6 +296,7 @@ to implement numeric repetition counts, which we won't need:
Str::clear(mr->exp[i]);
for (int j = at.brackets_start[i]; j <= at.brackets_end[i]; j++)
PUT_TO(mr->exp[i], Str::get_at(text, j));
mr->exp_at[i] = at.brackets_start[i];
}
mr->no_matched_texts = at.bc;
}
@ -290,8 +311,9 @@ says |q|, the only match is with a lower-case letter "q"), except that:
(e) |%i| means any character from the identifier class (see above);
(f) |%p| means any character which can be used in the name of a Preform
nonterminal, which is to say, an identifier character or a hyphen;
(g) |%P| means the same or else a colon.
(h) |%t| means a tab.
(g) |%P| means the same or else a colon;
(h) |%t| means a tab;
(i) |%q| means a double-quote.
|%| otherwise makes a literal escape; a space means any whitespace character;
square brackets enclose literal alternatives, and note as usual with grep
@ -330,9 +352,10 @@ int Regexp::get_cclass(wchar_t *pattern, int ppos, int *len, int *from, int *to,
}
*from = ppos; *to = ppos; return LITERAL_CLASS;
case '[':
*from = ppos+2;
*from = ppos+1;
ppos += 2;
while ((pattern[ppos]) && (pattern[ppos] != ']')) ppos++;
*to = ppos - 1; *len = ppos - *from + 1;
*to = ppos - 1; *len = ppos - *from + 2;
return LITERAL_CLASS;
case ' ':
*len = 1; return WHITESPACE_CLASS;
@ -358,6 +381,9 @@ int Regexp::test_cclass(int c, int chcl, int range_from, int range_to, wchar_t *
((c >= 'a') && (c <= 'z')) ||
((c >= '0') && (c <= '9'))) match = TRUE; break;
case LITERAL_CLASS:
if ((range_to > range_from) && (drawn_from[range_from] == '^')) {
range_from++; reverse = reverse?FALSE:TRUE;
}
for (int j = range_from; j <= range_to; j++) {
int c1 = drawn_from[j], c2 = c1;
if ((j+1 < range_to) && (drawn_from[j+1] == '-')) { c2 = drawn_from[j+2]; j += 2; }