Largely completed baseline version of ILDF
This commit is contained in:
parent
aeb670fc9a
commit
37b01a8d89
29 changed files with 2170 additions and 916 deletions
|
@ -389,7 +389,7 @@ division in the current section.
|
|||
extract_mode = TRUE;
|
||||
} else if ((current_paragraph) && (Regexp::match(&mr2, mr.exp[0], L"%(sample (%c+) code%)"))) {
|
||||
code_lcat_for_body = TEXT_EXTRACT_LCAT;
|
||||
code_pl_for_body = Languages::find_by_name(mr2.exp[0]);
|
||||
code_pl_for_body = Languages::find_by_name(mr2.exp[0], W);
|
||||
extract_mode = TRUE;
|
||||
} else if ((current_paragraph) && (Regexp::match(&mr2, mr.exp[0], L"%(sample code%)"))) {
|
||||
code_lcat_for_body = TEXT_EXTRACT_LCAT;
|
||||
|
|
|
@ -146,11 +146,11 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
|
|||
W->analysed = FALSE;
|
||||
W->as_ebook = NULL;
|
||||
W->redirect_weaves_to = NULL;
|
||||
W->main_language = Languages::default();
|
||||
W->main_language = Languages::default(W);
|
||||
W->no_lines = 0; W->no_paragraphs = 0;
|
||||
text_stream *language_name = Bibliographic::get_datum(W->md, I"Language");
|
||||
if (Str::len(language_name) > 0)
|
||||
W->main_language = Languages::find_by_name(language_name);
|
||||
W->main_language = Languages::find_by_name(language_name, W);
|
||||
main_target = Reader::add_tangle_target(W, W->main_language);
|
||||
|
||||
@<Initialise the rest of the chapter structure@> =
|
||||
|
@ -159,7 +159,7 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
|
|||
C->sections = NEW_LINKED_LIST(section);
|
||||
C->ch_language = W->main_language;
|
||||
if (Str::len(Cm->ch_language_name) > 0)
|
||||
C->ch_language = Languages::find_by_name(Cm->ch_language_name);
|
||||
C->ch_language = Languages::find_by_name(Cm->ch_language_name, W);
|
||||
|
||||
@<Initialise the rest of the section structure@> =
|
||||
S->sect_extent = 0;
|
||||
|
@ -176,10 +176,10 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
|
|||
S->owning_web = W;
|
||||
S->sect_language = C->ch_language;
|
||||
if (Str::len(S->md->sect_language_name) > 0)
|
||||
S->sect_language = Languages::find_by_name(S->md->sect_language_name);
|
||||
S->sect_language = Languages::find_by_name(S->md->sect_language_name, W);
|
||||
if (Str::len(S->md->sect_independent_language) > 0) {
|
||||
programming_language *pl =
|
||||
Languages::find_by_name(S->md->sect_independent_language);
|
||||
Languages::find_by_name(S->md->sect_independent_language, W);
|
||||
S->sect_language = pl;
|
||||
S->sect_target = Reader::add_tangle_target(W, pl);
|
||||
} else {
|
||||
|
|
|
@ -185,7 +185,7 @@ at us; but we don't weave them into the output, that's for sure.
|
|||
} else if (Regexp::match(&mr, figname, L"(%c+) as (%c+)")) {
|
||||
if (S->md->using_syntax < V2_SYNTAX)
|
||||
Parser::wrong_version(S->md->using_syntax, L, "[[F as L]]", V2_SYNTAX);
|
||||
programming_language *pl = Languages::find_by_name(mr.exp[1]);
|
||||
programming_language *pl = Languages::find_by_name(mr.exp[1], W);
|
||||
Formats::figure(OUT, wv, mr.exp[0], -1, -1, pl);
|
||||
} else {
|
||||
Formats::figure(OUT, wv, figname, -1, -1, NULL);
|
||||
|
|
|
@ -7,7 +7,7 @@ definitions from files.
|
|||
Programming languages are identified by name: for example, |C++| or |Perl|.
|
||||
|
||||
@ =
|
||||
programming_language *Languages::find_by_name(text_stream *lname) {
|
||||
programming_language *Languages::find_by_name(text_stream *lname, web *W) {
|
||||
programming_language *pl;
|
||||
@<If this is the name of a language already known, return that@>;
|
||||
@<Read the language definition file with this name@>;
|
||||
|
@ -23,21 +23,32 @@ programming_language *Languages::find_by_name(text_stream *lname) {
|
|||
return pl;
|
||||
|
||||
@<Read the language definition file with this name@> =
|
||||
filename *F = NULL;
|
||||
if (W) {
|
||||
pathname *P = Pathnames::subfolder(W->md->path_to_web, I"Private Languages");
|
||||
@<Try P@>;
|
||||
}
|
||||
pathname *P = Languages::default_directory();
|
||||
TEMPORARY_TEXT(leaf);
|
||||
WRITE_TO(leaf, "%S.ildf", lname);
|
||||
filename *F = Filenames::in_folder(P, leaf);
|
||||
DISCARD_TEXT(leaf);
|
||||
if (TextFiles::exists(F) == FALSE)
|
||||
@<Try P@>;
|
||||
if (F == NULL)
|
||||
Errors::fatal_with_text(
|
||||
"unsupported programming language '%S'", lname);
|
||||
pl = Languages::read_definition(F);
|
||||
|
||||
@<Try P@> =
|
||||
if (F == NULL) {
|
||||
TEMPORARY_TEXT(leaf);
|
||||
WRITE_TO(leaf, "%S.ildf", lname);
|
||||
F = Filenames::in_folder(P, leaf);
|
||||
DISCARD_TEXT(leaf);
|
||||
if (TextFiles::exists(F) == FALSE) F = NULL;
|
||||
}
|
||||
|
||||
@ I'm probably showing my age here.
|
||||
|
||||
=
|
||||
programming_language *Languages::default(void) {
|
||||
return Languages::find_by_name(I"C");
|
||||
programming_language *Languages::default(web *W) {
|
||||
return Languages::find_by_name(I"C", W);
|
||||
}
|
||||
|
||||
void Languages::show(OUTPUT_STREAM) {
|
||||
|
@ -210,9 +221,9 @@ declare a reserved keyword, or set a key to a value.
|
|||
pl->program = Languages::new_block(NULL, WHOLE_LINE_CRULE_RUN);
|
||||
state->current_block = pl->program;
|
||||
} else if (Regexp::match(&mr, line, L"keyword (%C+) of (%c+?)")) {
|
||||
Languages::reserved(pl, mr.exp[0], Languages::colour(mr.exp[1], tfp), tfp);
|
||||
Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), Languages::colour(mr.exp[1], tfp), tfp);
|
||||
} else if (Regexp::match(&mr, line, L"keyword (%C+)")) {
|
||||
Languages::reserved(pl, mr.exp[0], RESERVED_COLOUR, tfp);
|
||||
Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), RESERVED_COLOUR, tfp);
|
||||
} else if (Regexp::match(&mr, line, L"(%c+) *: *(%c+?)")) {
|
||||
text_stream *key = mr.exp[0], *value = Str::duplicate(mr.exp[1]);
|
||||
if (Str::eq(key, I"Name")) pl->language_name = Languages::text(value, tfp, TRUE);
|
||||
|
@ -291,6 +302,12 @@ runs of a given colour, or give an if-X-then-Y rule:
|
|||
rule->execute_block =
|
||||
Languages::new_block(state->current_block, CHARACTERS_CRULE_RUN);
|
||||
state->current_block = rule->execute_block;
|
||||
} else if (Regexp::match(&mr, line, L"characters in (%c+) {")) {
|
||||
colouring_rule *rule = Languages::new_rule(state->current_block);
|
||||
rule->execute_block =
|
||||
Languages::new_block(state->current_block, CHARACTERS_IN_CRULE_RUN);
|
||||
rule->execute_block->char_set = Languages::text(mr.exp[0], tfp, FALSE);
|
||||
state->current_block = rule->execute_block;
|
||||
} else if (Regexp::match(&mr, line, L"runs of (%c+) {")) {
|
||||
colouring_rule *rule = Languages::new_rule(state->current_block);
|
||||
int r = UNQUOTED_COLOUR;
|
||||
|
@ -302,6 +319,16 @@ runs of a given colour, or give an if-X-then-Y rule:
|
|||
rule->execute_block = Languages::new_block(state->current_block, INSTANCES_CRULE_RUN);
|
||||
rule->execute_block->run_instance = Languages::text(mr.exp[0], tfp, FALSE);
|
||||
state->current_block = rule->execute_block;
|
||||
} else if (Regexp::match(&mr, line, L"matches of (%c+) {")) {
|
||||
colouring_rule *rule = Languages::new_rule(state->current_block);
|
||||
rule->execute_block = Languages::new_block(state->current_block, MATCHES_CRULE_RUN);
|
||||
Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
|
||||
state->current_block = rule->execute_block;
|
||||
} else if (Regexp::match(&mr, line, L"brackets in (%c+) {")) {
|
||||
colouring_rule *rule = Languages::new_rule(state->current_block);
|
||||
rule->execute_block = Languages::new_block(state->current_block, BRACKETS_CRULE_RUN);
|
||||
Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
|
||||
state->current_block = rule->execute_block;
|
||||
} else {
|
||||
int at = -1, quoted = FALSE;
|
||||
for (int i=0; i<Str::len(line)-1; i++) {
|
||||
|
@ -329,7 +356,10 @@ represents a complete program.
|
|||
|
||||
@d WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */
|
||||
@d CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */
|
||||
@d INSTANCES_CRULE_RUN -3 /* This block applies to each instance in turn */
|
||||
@d CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */
|
||||
@d INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */
|
||||
@d MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */
|
||||
@d BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */
|
||||
|
||||
=
|
||||
typedef struct colouring_language_block {
|
||||
|
@ -337,6 +367,11 @@ typedef struct colouring_language_block {
|
|||
struct colouring_language_block *parent; /* or |NULL| for the topmost one */
|
||||
int run; /* one of the |*_CRULE_RUN| values, or else a colour */
|
||||
struct text_stream *run_instance; /* used only for |INSTANCES_CRULE_RUN| */
|
||||
struct text_stream *char_set; /* used only for |CHARACTERS_IN_CRULE_RUN| */
|
||||
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for |MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN| */
|
||||
|
||||
/* workspace during painting */
|
||||
struct match_results mr; /* of a regular expression */
|
||||
MEMORY_MANAGEMENT
|
||||
} colouring_language_block;
|
||||
|
||||
|
@ -347,6 +382,9 @@ colouring_language_block *Languages::new_block(colouring_language_block *within,
|
|||
block->parent = within;
|
||||
block->run = r;
|
||||
block->run_instance = NULL;
|
||||
block->char_set = NULL;
|
||||
block->match_regexp_text[0] = 0;
|
||||
block->mr = Regexp::create_mr();
|
||||
return block;
|
||||
}
|
||||
|
||||
|
@ -365,13 +403,18 @@ Note that rules can be unconditional, in that the premiss always passes.
|
|||
@d SPACED_RULE_SUFFIX 6 /* for |spaced suffix P| */
|
||||
@d OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for |optionally spaced suffix P| */
|
||||
|
||||
@d MAX_ILDF_REGEXP_LENGTH 64
|
||||
|
||||
=
|
||||
typedef struct colouring_rule {
|
||||
/* the premiss: */
|
||||
int match_colour; /* for |colour C|, or else |NOT_A_COLOUR| */
|
||||
int sense; /* |FALSE| to negate the condition */
|
||||
int match_colour; /* for |coloured C|, or else |NOT_A_COLOUR| */
|
||||
int match_keyword_of_colour; /* for |keyword C|, or else |NOT_A_COLOUR| */
|
||||
struct text_stream *match_text; /* or length 0 to mean "anything" */
|
||||
int match_prefix; /* one of the |*_RULE_PREFIX| values above */
|
||||
wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH];
|
||||
int number; /* for |number N| rules; 0 for others */
|
||||
|
||||
/* the conclusion: */
|
||||
struct colouring_language_block *execute_block; /* or |NULL|, in which case... */
|
||||
|
@ -381,6 +424,7 @@ typedef struct colouring_rule {
|
|||
|
||||
/* workspace during painting */
|
||||
int fix_position; /* where the prefix or suffix started */
|
||||
struct match_results mr; /* of a regular expression */
|
||||
MEMORY_MANAGEMENT
|
||||
} colouring_rule;
|
||||
|
||||
|
@ -389,15 +433,21 @@ colouring_rule *Languages::new_rule(colouring_language_block *within) {
|
|||
if (within == NULL) internal_error("rule outside block");
|
||||
colouring_rule *rule = CREATE(colouring_rule);
|
||||
ADD_TO_LINKED_LIST(rule, colouring_rule, within->rules);
|
||||
rule->sense = TRUE;
|
||||
rule->match_colour = NOT_A_COLOUR;
|
||||
rule->match_text = NULL;
|
||||
rule->match_prefix = NOT_A_RULE_PREFIX;
|
||||
rule->match_keyword_of_colour = NOT_A_COLOUR;
|
||||
rule->match_regexp_text[0] = 0;
|
||||
rule->number = 0;
|
||||
|
||||
rule->set_to_colour = NOT_A_COLOUR;
|
||||
rule->set_prefix_to_colour = NOT_A_COLOUR;
|
||||
rule->execute_block = NULL;
|
||||
rule->debug = FALSE;
|
||||
|
||||
rule->fix_position = 0;
|
||||
rule->mr = Regexp::create_mr();
|
||||
return rule;
|
||||
}
|
||||
|
||||
|
@ -413,13 +463,21 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
|
|||
}
|
||||
|
||||
@<Parse the premiss@> =
|
||||
if (Regexp::match(&mr, premiss, L"keyword of (%c+)")) {
|
||||
while (Regexp::match(&mr, premiss, L"not (%c+)")) {
|
||||
rule->sense = (rule->sense)?FALSE:TRUE;
|
||||
Str::clear(premiss); Str::copy(premiss, mr.exp[0]);
|
||||
}
|
||||
if (Regexp::match(&mr, premiss, L"number (%d+)")) {
|
||||
rule->number = Str::atoi(mr.exp[0], 0);
|
||||
} else if (Regexp::match(&mr, premiss, L"keyword of (%c+)")) {
|
||||
rule->match_keyword_of_colour = Languages::colour(mr.exp[0], tfp);
|
||||
} else if (Regexp::match(&mr, premiss, L"keyword")) {
|
||||
Errors::in_text_file("ambiguous: make it keyword of !reserved or \"keyword\"", tfp);
|
||||
} else if (Regexp::match(&mr, premiss, L"prefix (%c+)")) {
|
||||
rule->match_prefix = UNSPACED_RULE_PREFIX;
|
||||
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
||||
} else if (Regexp::match(&mr, premiss, L"matching (%c+)")) {
|
||||
Languages::regexp(rule->match_regexp_text, mr.exp[0], tfp);
|
||||
} else if (Regexp::match(&mr, premiss, L"spaced prefix (%c+)")) {
|
||||
rule->match_prefix = SPACED_RULE_PREFIX;
|
||||
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
||||
|
@ -435,7 +493,7 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
|
|||
} else if (Regexp::match(&mr, premiss, L"optionally spaced suffix (%c+)")) {
|
||||
rule->match_prefix = OPTIONALLY_SPACED_RULE_SUFFIX;
|
||||
rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
|
||||
} else if (Regexp::match(&mr, premiss, L"colou*r (%c+)")) {
|
||||
} else if (Regexp::match(&mr, premiss, L"coloured (%c+)")) {
|
||||
rule->match_colour = Languages::colour(mr.exp[0], tfp);
|
||||
} else if (Str::len(premiss) > 0) {
|
||||
rule->match_text = Languages::text(premiss, tfp, FALSE);
|
||||
|
@ -575,6 +633,12 @@ text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow)
|
|||
} else if ((bareword == FALSE) && (c == '"')) {
|
||||
Errors::in_text_file(
|
||||
"backslash needed before internal double-quotation mark", tfp);
|
||||
} else if ((bareword) && (c == '!') && (i == from)) {
|
||||
Errors::in_text_file(
|
||||
"a literal starting with ! must be in double-quotation marks", tfp);
|
||||
} else if ((bareword) && (c == '/')) {
|
||||
Errors::in_text_file(
|
||||
"forward slashes can only be used in quoted strings", tfp);
|
||||
} else if ((bareword) && (c == '"')) {
|
||||
Errors::in_text_file(
|
||||
"double-quotation marks can only be used in quoted strings", tfp);
|
||||
|
@ -588,6 +652,102 @@ text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow)
|
|||
Errors::in_text_file_S(err, tfp);
|
||||
DISCARD_TEXT(err);
|
||||
}
|
||||
if (bareword) {
|
||||
int rw = FALSE;
|
||||
if (Str::eq(V, I"both")) rw = TRUE;
|
||||
if (Str::eq(V, I"brackets")) rw = TRUE;
|
||||
if (Str::eq(V, I"characters")) rw = TRUE;
|
||||
if (Str::eq(V, I"coloured")) rw = TRUE;
|
||||
if (Str::eq(V, I"colouring")) rw = TRUE;
|
||||
if (Str::eq(V, I"debug")) rw = TRUE;
|
||||
if (Str::eq(V, I"false")) rw = TRUE;
|
||||
if (Str::eq(V, I"in")) rw = TRUE;
|
||||
if (Str::eq(V, I"instances")) rw = TRUE;
|
||||
if (Str::eq(V, I"keyword")) rw = TRUE;
|
||||
if (Str::eq(V, I"matches")) rw = TRUE;
|
||||
if (Str::eq(V, I"matching")) rw = TRUE;
|
||||
if (Str::eq(V, I"not")) rw = TRUE;
|
||||
if (Str::eq(V, I"of")) rw = TRUE;
|
||||
if (Str::eq(V, I"on")) rw = TRUE;
|
||||
if (Str::eq(V, I"optionally")) rw = TRUE;
|
||||
if (Str::eq(V, I"prefix")) rw = TRUE;
|
||||
if (Str::eq(V, I"runs")) rw = TRUE;
|
||||
if (Str::eq(V, I"spaced")) rw = TRUE;
|
||||
if (Str::eq(V, I"suffix")) rw = TRUE;
|
||||
if (Str::eq(V, I"true")) rw = TRUE;
|
||||
if (Str::eq(V, I"unquoted")) rw = TRUE;
|
||||
|
||||
if (rw) {
|
||||
TEMPORARY_TEXT(err);
|
||||
WRITE_TO(err, "'%S' is a reserved word, so you should put it in double-quotation marks", V);
|
||||
Errors::in_text_file_S(err, tfp);
|
||||
DISCARD_TEXT(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
return V;
|
||||
}
|
||||
|
||||
@ And regular expressions.
|
||||
|
||||
=
|
||||
void Languages::regexp(wchar_t *write_to, text_stream *T, text_file_position *tfp) {
|
||||
if (write_to == NULL) internal_error("no buffer");
|
||||
write_to[0] = 0;
|
||||
if (Str::len(T) > 0) {
|
||||
int from = 0, to = Str::len(T)-1, x = 0;
|
||||
if ((to > from) &&
|
||||
(Str::get_at(T, from) == '/') && (Str::get_at(T, to) == '/')) {
|
||||
from++; to--;
|
||||
for (int i=from; i<=to; i++) {
|
||||
wchar_t c = Str::get_at(T, i);
|
||||
if (c == '\\') {
|
||||
wchar_t w = Str::get_at(T, i+1);
|
||||
if (w == '\\') {
|
||||
x = Languages::add_to_regexp(write_to, x, w);
|
||||
} else if (w == 'd') {
|
||||
x = Languages::add_escape_to_regexp(write_to, x, 'd');
|
||||
} else if (w == 't') {
|
||||
x = Languages::add_escape_to_regexp(write_to, x, 't');
|
||||
} else if (w == 's') {
|
||||
x = Languages::add_to_regexp(write_to, x, ' ');
|
||||
} else if (w == 'S') {
|
||||
x = Languages::add_escape_to_regexp(write_to, x, 'C');
|
||||
} else if (w == '"') {
|
||||
x = Languages::add_escape_to_regexp(write_to, x, 'q');
|
||||
} else {
|
||||
x = Languages::add_escape_to_regexp(write_to, x, w);
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
if (c == '.') {
|
||||
x = Languages::add_escape_to_regexp(write_to, x, 'c');
|
||||
continue;
|
||||
}
|
||||
if (c == '%') {
|
||||
x = Languages::add_escape_to_regexp(write_to, x, '%');
|
||||
continue;
|
||||
}
|
||||
x = Languages::add_to_regexp(write_to, x, c);
|
||||
}
|
||||
} else {
|
||||
Errors::in_text_file(
|
||||
"the expression to match must be in slashes '/'", tfp);
|
||||
}
|
||||
if (x >= MAX_ILDF_REGEXP_LENGTH)
|
||||
Errors::in_text_file(
|
||||
"the expression to match is too long", tfp);
|
||||
}
|
||||
}
|
||||
|
||||
int Languages::add_to_regexp(wchar_t *write_to, int i, wchar_t c) {
|
||||
if (i < MAX_ILDF_REGEXP_LENGTH) write_to[i++] = c;
|
||||
return i;
|
||||
}
|
||||
|
||||
int Languages::add_escape_to_regexp(wchar_t *write_to, int i, wchar_t c) {
|
||||
i = Languages::add_to_regexp(write_to, i, '%');
|
||||
i = Languages::add_to_regexp(write_to, i, c);
|
||||
return i;
|
||||
}
|
||||
|
|
|
@ -207,24 +207,53 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
|
|||
LOOP_OVER_LINKED_LIST(rule, colouring_rule, block->rules) {
|
||||
switch (block->run) {
|
||||
case WHOLE_LINE_CRULE_RUN:
|
||||
Painter::execute_rule(HT, rule, matter, colouring, from, to);
|
||||
Painter::execute_rule(HT, rule, matter, colouring, from, to, 1);
|
||||
break;
|
||||
case CHARACTERS_CRULE_RUN:
|
||||
for (int i=from; i<=to; i++)
|
||||
Painter::execute_rule(HT, rule, matter, colouring, i, i);
|
||||
Painter::execute_rule(HT, rule, matter, colouring, i, i, i-from+1);
|
||||
break;
|
||||
case CHARACTERS_IN_CRULE_RUN:
|
||||
for (int count=1, i=from; i<=to; i++)
|
||||
for (int j=0; j<Str::len(block->char_set); j++)
|
||||
if (Str::get_at(matter, i) == Str::get_at(block->char_set, j) ) {
|
||||
Painter::execute_rule(HT, rule, matter, colouring, i, i, count++);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case INSTANCES_CRULE_RUN: {
|
||||
int L = Str::len(block->run_instance) - 1;
|
||||
if (L >= 0)
|
||||
for (int i=from; i<=to - L; i++)
|
||||
for (int count=1, i=from; i<=to - L; i++)
|
||||
if (ACMESupport::text_at(matter, i, block->run_instance)) {
|
||||
Painter::execute_rule(HT, rule, matter, colouring, i, i+L);
|
||||
Painter::execute_rule(HT, rule, matter, colouring, i, i+L, count++);
|
||||
i += L;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MATCHES_CRULE_RUN:
|
||||
for (int count=1, i=from; i<=to; i++) {
|
||||
int L = Regexp::match_from(&(block->mr), matter, block->match_regexp_text, i, TRUE);
|
||||
if (L > 0) {
|
||||
Painter::execute_rule(HT, rule, matter, colouring, i, i+L-1, count++);
|
||||
i += L-1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case BRACKETS_CRULE_RUN:
|
||||
for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
|
||||
if (block->mr.exp[i])
|
||||
Str::clear(block->mr.exp[i]);
|
||||
if (Regexp::match(&(block->mr), matter, block->match_regexp_text))
|
||||
for (int count=1, i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
|
||||
if (block->mr.exp_at[i] >= 0)
|
||||
Painter::execute_rule(HT, rule, matter, colouring,
|
||||
block->mr.exp_at[i],
|
||||
block->mr.exp_at[i] + Str::len(block->mr.exp[i])-1,
|
||||
count++);
|
||||
break;
|
||||
default: {
|
||||
int ident_from = -1;
|
||||
int ident_from = -1, count = 1;
|
||||
for (int i=from; i<=to; i++) {
|
||||
int col = Str::get_at(colouring_at_start, i);
|
||||
if ((col == block->run) ||
|
||||
|
@ -233,12 +262,12 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
|
|||
if (ident_from == -1) ident_from = i;
|
||||
} else {
|
||||
if (ident_from >= 0)
|
||||
Painter::execute_rule(HT, rule, matter, colouring, ident_from, i-1);
|
||||
Painter::execute_rule(HT, rule, matter, colouring, ident_from, i-1, count++);
|
||||
ident_from = -1;
|
||||
}
|
||||
}
|
||||
if (ident_from >= 0)
|
||||
Painter::execute_rule(HT, rule, matter, colouring, ident_from, to);
|
||||
Painter::execute_rule(HT, rule, matter, colouring, ident_from, to, count++);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -250,8 +279,8 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
|
|||
|
||||
=
|
||||
void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *matter,
|
||||
text_stream *colouring, int from, int to) {
|
||||
if (Painter::satisfies(HT, rule, matter, colouring, from, to))
|
||||
text_stream *colouring, int from, int to, int N) {
|
||||
if (Painter::satisfies(HT, rule, matter, colouring, from, to, N) == rule->sense)
|
||||
Painter::follow(HT, rule, matter, colouring, from, to);
|
||||
}
|
||||
|
||||
|
@ -267,8 +296,13 @@ void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *ma
|
|||
|
||||
=
|
||||
int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter,
|
||||
text_stream *colouring, int from, int to) {
|
||||
if (Str::len(rule->match_text) > 0) {
|
||||
text_stream *colouring, int from, int to, int N) {
|
||||
if (rule->number > 0) {
|
||||
if (rule->number != N) return FALSE;
|
||||
} else if (rule->match_regexp_text[0]) {
|
||||
if (Regexp::match(&(rule->mr), matter, rule->match_regexp_text) == FALSE)
|
||||
return FALSE;
|
||||
} else if (Str::len(rule->match_text) > 0) {
|
||||
if ((rule->match_prefix == UNSPACED_RULE_PREFIX) ||
|
||||
(rule->match_prefix == SPACED_RULE_PREFIX) ||
|
||||
(rule->match_prefix == OPTIONALLY_SPACED_RULE_PREFIX)) {
|
||||
|
@ -295,7 +329,11 @@ int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter
|
|||
return FALSE;
|
||||
rule->fix_position = pos;
|
||||
} else {
|
||||
if (Str::ne(matter, rule->match_text)) return FALSE;
|
||||
if (Str::len(rule->match_text) != to-from+1)
|
||||
return FALSE;
|
||||
for (int i=from; i<=to; i++)
|
||||
if (Str::get_at(matter, i) != Str::get_at(rule->match_text, i-from))
|
||||
return FALSE;
|
||||
}
|
||||
} else if (rule->match_keyword_of_colour != NOT_A_COLOUR) {
|
||||
TEMPORARY_TEXT(id);
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
Name: ACME
|
||||
Details: The ACME assembly language for 6502 and related CPUs
|
||||
Extension: .a
|
||||
Line Comment: ;
|
||||
Name: "ACME"
|
||||
Details: "The ACME assembly language for 6502 and related CPUs"
|
||||
Extension: ".a"
|
||||
Line Comment: ";"
|
||||
String Literal: "\""
|
||||
String Literal Escape: \
|
||||
Character Literal: '
|
||||
Character Literal Escape: \
|
||||
Binary Literal Prefix: %
|
||||
Hexadecimal Literal Prefix: $
|
||||
Negative Literal Prefix: -
|
||||
String Literal Escape: "\\"
|
||||
Character Literal: "'"
|
||||
Character Literal Escape: "\\"
|
||||
Binary Literal Prefix: "%"
|
||||
Hexadecimal Literal Prefix: "$"
|
||||
Negative Literal Prefix: "-"
|
||||
|
||||
colouring {
|
||||
runs of unquoted {
|
||||
|
|
|
@ -1,34 +1,34 @@
|
|||
Name: C++
|
||||
Details: The C++ programming language
|
||||
Extension: .cpp
|
||||
Multiline Comment Open: /*
|
||||
Multiline Comment Close: */
|
||||
Line Comment: //
|
||||
Name: "C++"
|
||||
Details: "The C++ programming language"
|
||||
Extension: ".cpp"
|
||||
Multiline Comment Open: "/*"
|
||||
Multiline Comment Close: "*/"
|
||||
Line Comment: "//"
|
||||
String Literal: "\""
|
||||
String Literal Escape: \
|
||||
Character Literal: '
|
||||
Character Literal Escape: \
|
||||
String Literal Escape: "\\"
|
||||
Character Literal: "'"
|
||||
Character Literal Escape: "\\"
|
||||
C-Like: true
|
||||
|
||||
# C++ does in fact support octal literals, marking them as starting with an
|
||||
# unnecessary initial zero. This is practically obsolete now, and in any case
|
||||
# makes no difference to syntax-colouring.
|
||||
|
||||
Hexadecimal Literal Prefix: 0x
|
||||
Binary Literal Prefix: 0b
|
||||
Negative Literal Prefix: -
|
||||
Hexadecimal Literal Prefix: "0x"
|
||||
Binary Literal Prefix: "0b"
|
||||
Negative Literal Prefix: "-"
|
||||
|
||||
Before Named Paragraph Expansion: \n{\n
|
||||
After Named Paragraph Expansion: }\n
|
||||
Start Ifdef: #ifdef %S\n
|
||||
End Ifdef: #endif /* %S */\n
|
||||
Start Ifndef: #ifndef %S\n
|
||||
End Ifndef: #endif /* %S */\n
|
||||
Before Named Paragraph Expansion: "\n{\n"
|
||||
After Named Paragraph Expansion: "}\n"
|
||||
Start Ifdef: "#ifdef %S\n"
|
||||
End Ifdef: "#endif /* %S */\n"
|
||||
Start Ifndef: "#ifndef %S\n"
|
||||
End Ifndef: "#endif /* %S */\n"
|
||||
Line Marker: "#line %d \"%f\"\n"
|
||||
|
||||
Start Definition: #define %S\s
|
||||
Prolong Definition: \\\n\s\s\s\s
|
||||
End Definition: \n
|
||||
Start Definition: "#define %S\s"
|
||||
Prolong Definition: "\\\n\s\s\s\s"
|
||||
End Definition: "\n"
|
||||
|
||||
keyword auto
|
||||
keyword break
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
Name: C
|
||||
Details: The C programming language
|
||||
Extension: .c
|
||||
Multiline Comment Open: /*
|
||||
Multiline Comment Close: */
|
||||
Line Comment: //
|
||||
Name: "C"
|
||||
Details: "The C programming language"
|
||||
Extension: ".c"
|
||||
Multiline Comment Open: "/*"
|
||||
Multiline Comment Close: "*/"
|
||||
Line Comment: "//"
|
||||
String Literal: "\""
|
||||
String Literal Escape: \
|
||||
Character Literal: '
|
||||
Character Literal Escape: \
|
||||
String Literal Escape: "\\"
|
||||
Character Literal: "'"
|
||||
Character Literal Escape: "\\"
|
||||
C-Like: true
|
||||
|
||||
# C does in fact support octal literals, marking them as starting with an
|
||||
|
@ -16,21 +16,21 @@ C-Like: true
|
|||
# rejected by the C standards body as useless, but are so useful that gcc
|
||||
# and clang support them anyway.
|
||||
|
||||
Hexadecimal Literal Prefix: 0x
|
||||
Binary Literal Prefix: 0b
|
||||
Negative Literal Prefix: -
|
||||
Hexadecimal Literal Prefix: "0x"
|
||||
Binary Literal Prefix: "0b"
|
||||
Negative Literal Prefix: "-"
|
||||
|
||||
Before Named Paragraph Expansion: \n{\n
|
||||
After Named Paragraph Expansion: }\n
|
||||
Start Ifdef: #ifdef %S\n
|
||||
End Ifdef: #endif /* %S */\n
|
||||
Start Ifndef: #ifndef %S\n
|
||||
End Ifndef: #endif /* %S */\n
|
||||
Before Named Paragraph Expansion: "\n{\n"
|
||||
After Named Paragraph Expansion: "}\n"
|
||||
Start Ifdef: "#ifdef %S\n"
|
||||
End Ifdef: "#endif /* %S */\n"
|
||||
Start Ifndef: "#ifndef %S\n"
|
||||
End Ifndef: "#endif /* %S */\n"
|
||||
Line Marker: "#line %d \"%f\"\n"
|
||||
|
||||
Start Definition: #define %S\s
|
||||
Prolong Definition: \\\n\s\s\s\s
|
||||
End Definition: \n
|
||||
Start Definition: "#define %S\s"
|
||||
Prolong Definition: "\\\n\s\s\s\s"
|
||||
End Definition: "\n"
|
||||
|
||||
keyword auto
|
||||
keyword break
|
||||
|
|
|
@ -1,17 +1,44 @@
|
|||
Name: ILDF
|
||||
Details: The Inweb Language Definition File format
|
||||
Extension: .ildf
|
||||
Whole Line Comment: #
|
||||
Name: "ILDF"
|
||||
Details: "The Inweb Language Definition File format"
|
||||
Extension: ".ildf"
|
||||
Whole Line Comment: "#"
|
||||
Supports Namespaces: false
|
||||
|
||||
String Literal: "\""
|
||||
String Literal Escape: \
|
||||
String Literal Escape: "\\"
|
||||
|
||||
keyword unquoted of !element
|
||||
# Regular expressions are handled here as if character literals
|
||||
Character Literal: "/"
|
||||
Character Literal Escape: "\\"
|
||||
|
||||
keyword "both"
|
||||
keyword "brackets"
|
||||
keyword "characters"
|
||||
keyword "coloured"
|
||||
keyword "colouring"
|
||||
keyword "debug"
|
||||
keyword "false"
|
||||
keyword "in"
|
||||
keyword "instances"
|
||||
keyword "keyword"
|
||||
keyword "matches"
|
||||
keyword "matching"
|
||||
keyword "not"
|
||||
keyword "of"
|
||||
keyword "on"
|
||||
keyword "optionally"
|
||||
keyword "prefix"
|
||||
keyword "runs"
|
||||
keyword "spaced"
|
||||
keyword "suffix"
|
||||
keyword "true"
|
||||
keyword "unquoted" of !element
|
||||
|
||||
colouring {
|
||||
runs of !identifier {
|
||||
prefix ! => !element
|
||||
prefix "!" => !element on both
|
||||
keyword of !element => !element
|
||||
keyword of !reserved => !reserved
|
||||
}
|
||||
runs of unquoted {
|
||||
instances of "=>" {
|
||||
|
@ -24,4 +51,16 @@ colouring {
|
|||
=> !reserved
|
||||
}
|
||||
}
|
||||
characters {
|
||||
# Anything left of these colours will be unquoted strings, so...
|
||||
coloured !constant => !string
|
||||
coloured !identifier => !string
|
||||
# Regular expressions, now coloured !character, are more like functions
|
||||
coloured !character => !function
|
||||
}
|
||||
# Detect Property: Value lines, not being fooled by a colon inside quotes
|
||||
brackets in /\s*([A-Z][^"]*):.*/ {
|
||||
# Uncolour only the bracketed part, i.e., the Property part
|
||||
=> !plain
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
Name: InC
|
||||
Details: The Inform-tools extension to the C programming language
|
||||
Extension: .c
|
||||
Name: "InC"
|
||||
Details: "The Inform-tools extension to the C programming language"
|
||||
Extension: ".c"
|
||||
Supports Namespaces: true
|
||||
Multiline Comment Open: /*
|
||||
Multiline Comment Close: */
|
||||
Line Comment: //
|
||||
Multiline Comment Open: "/*"
|
||||
Multiline Comment Close: "*/"
|
||||
Line Comment: "//"
|
||||
String Literal: "\""
|
||||
String Literal Escape: \
|
||||
Character Literal: '
|
||||
Character Literal Escape: \
|
||||
String Literal Escape: "\\"
|
||||
Character Literal: "'"
|
||||
Character Literal Escape: "\\"
|
||||
C-Like: true
|
||||
|
||||
# C does in fact support octal literals, marking them as starting with an
|
||||
|
@ -17,30 +17,28 @@ C-Like: true
|
|||
# rejected by the C standards body as useless, but are so useful that gcc
|
||||
# and clang support them anyway.
|
||||
|
||||
Hexadecimal Literal Prefix: 0x
|
||||
Binary Literal Prefix: 0b
|
||||
Negative Literal Prefix: -
|
||||
Hexadecimal Literal Prefix: "0x"
|
||||
Binary Literal Prefix: "0b"
|
||||
Negative Literal Prefix: "-"
|
||||
|
||||
# The "shebang" routine for a language is called to add anything it wants to
|
||||
# at the very top of the tangled code. (For a scripting language such as
|
||||
# Perl or Python, that might be a shebang: hence the name.)
|
||||
# Perl or Python, that might be a shebang: "hence the name.)"
|
||||
# But we will use it to defime the constant PLATFORM_POSIX everywhere except
|
||||
# Windows. This needs to happen right at the top, because the "very early
|
||||
# code" in a tangle may contain material conditional on whether it is defined.
|
||||
|
||||
Shebang: #ifndef PLATFORM_WINDOWS\n#define PLATFORM_POSIX\n#endif\n
|
||||
|
||||
Before Named Paragraph Expansion: \n{\n
|
||||
After Named Paragraph Expansion: }\n
|
||||
Start Ifdef: #ifdef %S\n
|
||||
End Ifdef: #endif /* %S */\n
|
||||
Start Ifndef: #ifndef %S\n
|
||||
End Ifndef: #endif /* %S */\n
|
||||
Shebang: "#ifndef PLATFORM_WINDOWS\n#define PLATFORM_POSIX\n#endif\n"
|
||||
Before Named Paragraph Expansion: "\n{\n"
|
||||
After Named Paragraph Expansion: "}\n"
|
||||
Start Ifdef: "#ifdef %S\n"
|
||||
End Ifdef: "#endif /* %S */\n"
|
||||
Start Ifndef: "#ifndef %S\n"
|
||||
End Ifndef: "#endif /* %S */\n"
|
||||
Line Marker: "#line %d \"%f\"\n"
|
||||
|
||||
Start Definition: #define %S\s
|
||||
Prolong Definition: \\\n\s\s\s\s
|
||||
End Definition: \n
|
||||
Start Definition: "#define %S\s"
|
||||
Prolong Definition: "\\\n\s\s\s\s"
|
||||
End Definition: "\n"
|
||||
|
||||
# FILE gets in even though it's not technically reserved but only a type
|
||||
# name, defined in the standard C library.
|
||||
|
|
|
@ -1,22 +1,22 @@
|
|||
Name: Inform 6
|
||||
Details: The C-like interactive fiction language Inform 6
|
||||
Extension: .i6
|
||||
Line Comment: !
|
||||
Name: "Inform 6"
|
||||
Details: "The C-like interactive fiction language Inform 6"
|
||||
Extension: ".i6"
|
||||
Line Comment: "!"
|
||||
String Literal: "\""
|
||||
String Literal Escape: \
|
||||
Character Literal: '
|
||||
Character Literal Escape: \
|
||||
Binary Literal Prefix: $$
|
||||
Hexadecimal Literal Prefix: $
|
||||
Negative Literal Prefix: -
|
||||
String Literal Escape: "\\"
|
||||
Character Literal: "'"
|
||||
Character Literal Escape: "\\"
|
||||
Binary Literal Prefix: "$$"
|
||||
Hexadecimal Literal Prefix: "$"
|
||||
Negative Literal Prefix: "-"
|
||||
|
||||
Start Definition: Constant %S =\s
|
||||
End Definition: ;\n
|
||||
Start Definition: "Constant %S =\s"
|
||||
End Definition: ";\n"
|
||||
|
||||
Start Ifdef: #ifdef %S;\n
|
||||
End Ifdef: #endif; ! %S\n
|
||||
Start Ifndef: #ifndef %S;\n
|
||||
End Ifndef: #endif; ! %S\n
|
||||
Start Ifdef: "#ifdef %S;\n"
|
||||
End Ifdef: "#endif; ! %S\n"
|
||||
Start Ifndef: "#ifndef %S;\n"
|
||||
End Ifndef: "#endif; ! %S\n"
|
||||
|
||||
# Reserved words:
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
Name: Inform 7
|
||||
Details: The natural-language based language Inform 7
|
||||
Extension: .i7x
|
||||
Multiline Comment Open: [
|
||||
Multiline Comment Close: ]
|
||||
Name: "Inform 7"
|
||||
Details: "The natural-language based language Inform 7"
|
||||
Extension: ".i7x"
|
||||
Multiline Comment Open: "["
|
||||
Multiline Comment Close: "]"
|
||||
String Literal: "\""
|
||||
|
||||
# This is here so that tangling the Standard Rules extension doesn't insert
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
Name: None
|
||||
Details: For programs in languages not yet supported by Inweb
|
||||
Extension: .txt
|
||||
Name: "None"
|
||||
Details: "For programs in languages not yet supported by Inweb"
|
||||
Extension: ".txt"
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
Name: Perl
|
||||
Details: The scripting language Perl 5
|
||||
Extension: .pl
|
||||
Line Comment: #
|
||||
Name: "Perl"
|
||||
Details: "The scripting language Perl 5"
|
||||
Extension: ".pl"
|
||||
Line Comment: "#"
|
||||
String Literal: "\""
|
||||
String Literal Escape: \
|
||||
Character Literal: '
|
||||
Character Literal Escape: \
|
||||
String Literal Escape: "\\"
|
||||
Character Literal: "'"
|
||||
Character Literal Escape: "\\"
|
||||
|
||||
Shebang: #!/usr/bin/perl\n\n
|
||||
Before Named Paragraph Expansion: \n{\n
|
||||
After Named Paragraph Expansion: }\n
|
||||
Start Definition: %S =
|
||||
End Definition: \n;\n
|
||||
Shebang: "#!/usr/bin/perl\n\n"
|
||||
Before Named Paragraph Expansion: "\n{\n"
|
||||
After Named Paragraph Expansion: "}\n"
|
||||
Start Definition: "%S ="
|
||||
End Definition: "\n;\n"
|
||||
|
||||
# In its usual zany way, Perl recognises the same #line syntax as C, thus in
|
||||
# principle overloading its comment notation #:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
Name: Plain Text
|
||||
Details: For text files which are not programs
|
||||
Extension: .txt
|
||||
Name: "Plain Text"
|
||||
Details: "For text files which are not programs"
|
||||
Extension: ".txt"
|
||||
|
||||
colouring {
|
||||
=> !plain
|
||||
|
|
|
@ -46,7 +46,9 @@ This section of the manual is about how to do it.
|
|||
|
||||
Once you have written a definition, use |-read-language L| at the command
|
||||
line, where |L| is the file defining it. If you have many custom languages,
|
||||
|-read-languages D| reads all of the definitions in a directory |D|.
|
||||
|-read-languages D| reads all of the definitions in a directory |D|. Or, if
|
||||
the language in question is really quite specific to a single web, you can
|
||||
make a |Private Languages| subdirectory of the web and put it in there.
|
||||
|
||||
@h Structure of language definitions.
|
||||
Each language is defined by a single ILDF file. ("Inweb Language Definition
|
||||
|
@ -57,7 +59,7 @@ trailing whitespace on each line is ignored; blank lines are ignored; and
|
|||
so are comments, which are lines beginning with a |#| character.
|
||||
|
||||
The ILD contains three sorts of thing:
|
||||
(a) Properties, set by lines in the form |Name: C++|.
|
||||
(a) Properties, set by lines in the form |Name: "C++"|.
|
||||
(b) Keywords, set by lines in the form |keyword int|.
|
||||
(c) A colouring program, introduced by |colouring {| and continuing until the
|
||||
last block of it is closed with a |}|.
|
||||
|
@ -66,15 +68,15 @@ Everything in an ILD is optional, so a minimal ILD is in principle empty. In
|
|||
practice, though, every ILD should open like so:
|
||||
|
||||
= (sample ILDF code)
|
||||
Name: C
|
||||
Details: The C programming language
|
||||
Extension: .c
|
||||
Name: "C"
|
||||
Details: "The C programming language"
|
||||
Extension: ".c"
|
||||
|
||||
@h Properties.
|
||||
Inevitably, there's a miscellaneous shopping list of these, but let's start
|
||||
with the semi-compulsory ones.
|
||||
|
||||
|Name|. This is the one used by webs in their |Language: X| lines, and should
|
||||
|Name|. This is the one used by webs in their |Language: "X"| lines, and should
|
||||
match the ILD's own filename: wherever it is stored, the ILD for langauge |X|
|
||||
should be filenamed |X.ildf|.
|
||||
|
||||
|
@ -102,9 +104,9 @@ as a pair or not at all, is the notation for multiline comments.
|
|||
For example, C defines:
|
||||
|
||||
= (sample ILDF code)
|
||||
Multiline Comment Open: /*
|
||||
Multiline Comment Close: */
|
||||
Line Comment: //
|
||||
Multiline Comment Open: "/*"
|
||||
Multiline Comment Close: "*/"
|
||||
Line Comment: "//"
|
||||
|
||||
@ As noted, comments occur only outside of string or character literals. We
|
||||
can give notations for these as follows:
|
||||
|
@ -121,9 +123,9 @@ Here, C defines:
|
|||
|
||||
= (sample ILDF code)
|
||||
String Literal: "\""
|
||||
String Literal Escape: \
|
||||
Character Literal: '
|
||||
Character Literal Escape: \
|
||||
String Literal Escape: "\\"
|
||||
Character Literal: "'"
|
||||
Character Literal Escape: "\\"
|
||||
|
||||
@ Next, numeric literals, like |0xFE45| in C, or |$$10011110| in Inform 6.
|
||||
It's assumed that every language allows non-negative decimal numbers.
|
||||
|
@ -136,16 +138,16 @@ are notations for non-decimal numbers, if they exist.
|
|||
Here, C has:
|
||||
|
||||
= (sample ILDF code)
|
||||
Hexadecimal Literal Prefix: 0x
|
||||
Binary Literal Prefix: 0b
|
||||
Negative Literal Prefix: -
|
||||
Hexadecimal Literal Prefix: "0x"
|
||||
Binary Literal Prefix: "0b"
|
||||
Negative Literal Prefix: "-"
|
||||
|
||||
@ |Shebang| is used only in tangling, and is a probably short text added at
|
||||
the very beginning of a tangled program. This is useful for scripting languages
|
||||
in Unix, where the opening line must be a "shebang" indicating their language.
|
||||
For example, Perl defines:
|
||||
= (sample ILDF code)
|
||||
Shebang: #!/usr/bin/perl\n\n
|
||||
Shebang: "#!/usr/bin/perl\n\n"
|
||||
=
|
||||
Most languages do not have a shebang.
|
||||
|
||||
|
@ -170,8 +172,8 @@ matter added. This material is in |Before Named Paragraph Expansion| and
|
|||
|
||||
For C and all similar languages, we recommend this:
|
||||
= (sample ILDF code)
|
||||
Before Named Paragraph Expansion: \n{\n
|
||||
After Named Paragraph Expansion: }\n
|
||||
Before Named Paragraph Expansion: "\n{\n"
|
||||
After Named Paragraph Expansion: "}\n"
|
||||
=
|
||||
The effect of this is to ensure that code such as:
|
||||
= (not code)
|
||||
|
@ -200,12 +202,12 @@ It can only do so if the language provides a notation for that.
|
|||
continue a multiline definition (if they are allowed); and |End Definition|,
|
||||
if given, places any ending notation. For example, Inform 6 defines:
|
||||
= (sample ILDF code)
|
||||
Start Definition: Constant %S =\s
|
||||
End Definition: ;\n
|
||||
Start Definition: "Constant %S =\s"
|
||||
End Definition: ";\n"
|
||||
=
|
||||
where |%S| expands to the name of the term to be defined. Thus, we might tangle
|
||||
out to:
|
||||
= (sample ILDF code)
|
||||
= (not code)
|
||||
Constant TAXICAB = 1729;\n
|
||||
=
|
||||
Inweb ignores all definitions unless one of these three properties is given.
|
||||
|
@ -216,10 +218,10 @@ makes use of this to handle code dependent on the operating system in use.
|
|||
If the language supports it, the notation is in |Start Ifdef| and |End Ifdef|,
|
||||
and in |Start Ifndef| and |End Ifndef|. For example, Inform 6 has:
|
||||
= (sample ILDF code)
|
||||
Start Ifdef: #ifdef %S;\n
|
||||
End Ifdef: #endif; ! %S\n
|
||||
Start Ifndef: #ifndef %S;\n
|
||||
End Ifndef: #endif; ! %S\n
|
||||
Start Ifdef: "#ifdef %S;\n"
|
||||
End Ifdef: "#endif; ! %S\n"
|
||||
Start Ifndef: "#ifndef %S;\n"
|
||||
End Ifndef: "#endif; ! %S\n"
|
||||
=
|
||||
which is a subtly different notation from the C one. Again, |%S| expands to
|
||||
the name of the term we are conditionally compiling on.
|
||||
|
@ -310,88 +312,215 @@ block, that's a line of source code. Blocks normally contain one or more
|
|||
"rules":
|
||||
= (sample ILDF code)
|
||||
colouring {
|
||||
marble => !extract
|
||||
marble => !function
|
||||
}
|
||||
=
|
||||
Rules take the form of "if X, then Y", and the |=>| divides the X from the Y.
|
||||
This one says that if the snippet consists of the word "marble", then colour
|
||||
it |!extract|. Of course this is not very useful, since it would only catch
|
||||
it |!function|. Of course this is not very useful, since it would only catch
|
||||
lines containing only that one word. So we really want to narrow in on smaller
|
||||
snippets:
|
||||
snippets. This, for example, applies its rule to each individual character
|
||||
in turn:
|
||||
= (sample ILDF code)
|
||||
colouring {
|
||||
characters {
|
||||
X => !extract
|
||||
K => !identifier
|
||||
}
|
||||
}
|
||||
=
|
||||
The effect of the |characters {| ... |}| block is to apply all its rules to
|
||||
each character of the snippet owning it. Inside the block, then, the snippet
|
||||
is always just a single character, and our rule tells us to paint the letter X
|
||||
wherever it occurs.
|
||||
|
||||
@ The block |instances of X| narrows in on each usage of the text |X| inside
|
||||
@ In the above examples, |K| and |marble| appeared without quotation marks,
|
||||
but they were only allowed to do that because (a) they were single words,
|
||||
(b) those words had no other meaning, and (c) they didn't contain any
|
||||
awkward characters. For any more complicated texts, always use quotation
|
||||
marks. For example, in
|
||||
= (sample ILDF code)
|
||||
"=>" => !reserved
|
||||
=
|
||||
the |=>| in quotes is just text, whereas the one outside quotes is being
|
||||
used to divide a rule.
|
||||
|
||||
If you need a literal double quote inside the double-quotes, use |\"|; and
|
||||
use |\\| for a literal backslash. For example:
|
||||
= (sample ILDF code)
|
||||
"\\\"" => !reserved
|
||||
=
|
||||
actually matches the text |\"|.
|
||||
|
||||
@h The six splits.
|
||||
|characters| is an example of a "split", which splits up the original snippet
|
||||
of text -- say, the line |let K = 2| -- into smaller, non-overlapping snippets
|
||||
-- in this case, nine of them: |l|, |e|, |t|, | |, |K|, | |, |=|, | |, and |2|.
|
||||
Every split is followed by a block of rules, which is applied to each of the
|
||||
pieces in turn. Inweb works sideways-first: thus, if the block contains rules
|
||||
R1, R2, ..., then R1 is applied to each piece first, then R2 to each piece,
|
||||
and so on.
|
||||
|
||||
There are several different ways to split, all of them written in the
|
||||
plural, to emphasize that they work on what are usually multiple things.
|
||||
Rules, on the other hand, are written in the singular. Splits are not allowed
|
||||
to be followed by |=>|: they always begin a block.
|
||||
|
||||
1. |characters| splits the snippet into each of its characters.
|
||||
|
||||
2. |characters in T| splits the snippet into each of its characters which
|
||||
lie inside the text |T|. For example, here is a not very useful ILD for
|
||||
plain text in which all vowels are in red:
|
||||
|
||||
[[../Private Languages/VowelsExample.ildf as ILDF]]
|
||||
|
||||
Given the text:
|
||||
= (not code)
|
||||
A noir, E blanc, I rouge, U vert, O bleu : voyelles,
|
||||
Je dirai quelque jour vos naissances latentes :
|
||||
A, noir corset velu des mouches éclatantes
|
||||
Qui bombinent autour des puanteurs cruelles,
|
||||
=
|
||||
this produces:
|
||||
= (sample VowelsExample code)
|
||||
A noir, E blanc, I rouge, U vert, O bleu : voyelles,
|
||||
Je dirai quelque jour vos naissances latentes :
|
||||
A, noir corset velu des mouches éclatantes
|
||||
Qui bombinent autour des puanteurs cruelles,
|
||||
=
|
||||
|
||||
3. The split |instances of X| narrows in on each usage of the text |X| inside
|
||||
the snippet. For example,
|
||||
= (sample ILDF code)
|
||||
colouring {
|
||||
instances of == {
|
||||
=> !reserved
|
||||
}
|
||||
}
|
||||
[[../Private Languages/LineageExample.ildf as ILDF]]
|
||||
acts on the text:
|
||||
= (not code)
|
||||
Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the
|
||||
grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.
|
||||
=
|
||||
gives every usage of |==| the colour |!reserved|. Note that it never runs in
|
||||
an overlapping way: the snippet |===| would be considered as having only one
|
||||
instance of |==| (the first two characters), while |====| would have two.
|
||||
to produce:
|
||||
= (sample LineageExample code)
|
||||
Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the
|
||||
grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.
|
||||
=
|
||||
Note that it never runs in an overlapping way: the snippet |===| would be
|
||||
considered as having only one instance of |==| (the first two characters),
|
||||
while |====| would have two.
|
||||
|
||||
@ Another kind of block is |runs of C|, where |C| is a colour. For example:
|
||||
= (sample ILDF code)
|
||||
colouring {
|
||||
runs of !identifier {
|
||||
printf => !function
|
||||
sscanf => !function
|
||||
}
|
||||
}
|
||||
4. The split |runs of C|, where |C| describes a colour, splits the snippet
|
||||
into non-overlapping contiguous pieces which have that colour. For example:
|
||||
[[../Private Languages/RunningExample.ildf as ILDF]]
|
||||
acts on:
|
||||
= (not code)
|
||||
Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,
|
||||
who published their so-called Memoirs over the period 1798-1801.
|
||||
=
|
||||
If this runs on the line |if (x == 1) printf("Hello!");|, then the inner
|
||||
block will run three times: its snippet will be |if|, then |x|, then |printf|.
|
||||
The rules inside the block will take effect only on the third time, when it
|
||||
will paint the word |printf| in |!function| colour.
|
||||
to produce:
|
||||
= (sample RunningExample code)
|
||||
Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,
|
||||
who published their so-called Memoirs over the period 1798-1801.
|
||||
=
|
||||
Here the hyphens in number ranges have been coloured, but not the hyphen
|
||||
in "so-called".
|
||||
|
||||
A more computer-science sort of example would be:
|
||||
[[../Private Languages/StdioExample.ildf as ILDF]]
|
||||
which acts on:
|
||||
= (not code)
|
||||
if (x == 1) printf("Hello!");
|
||||
=
|
||||
to produce:
|
||||
= (sample StdioExample code)
|
||||
if (x == 1) printf("Hello!");
|
||||
=
|
||||
The split divides the line up into three runs, and the inner block runs three
|
||||
times: on |if|, then |x|, then |printf|. Only the third time has any effect.
|
||||
|
||||
As a special form, |runs of unquoted| means "runs of characters not painted
|
||||
either with |!string| or |!character|". This is special because |unquoted| is
|
||||
not a colour.
|
||||
|
||||
@ It remains to specify what rules can do. As noted, they take the form
|
||||
"if X, then Y". The following are the possibilities for X, the condition:
|
||||
5. The split |matches of /E/|, where |/E/| is a regular expression (see below),
|
||||
splits the snippet up into non-overlapping pieces which match it: possibly
|
||||
none at all, of course, in which case the block of rules is never used.
|
||||
This is easier to demonstrate than explain:
|
||||
[[../Private Languages/AssemblageExample.ildf as ILDF]]
|
||||
which acts on:
|
||||
= (not code)
|
||||
JSR .initialise
|
||||
LDR A, #.data
|
||||
RTS
|
||||
.initialise
|
||||
TAX
|
||||
=
|
||||
to produce:
|
||||
= (sample AssemblageExample code)
|
||||
JSR .initialise
|
||||
LDR A, #.data
|
||||
RTS
|
||||
.initialise
|
||||
TAX
|
||||
=
|
||||
|
||||
1. X can be omitted altogether, and then the rule always applies. For example,
|
||||
this somewhat nihilistic program gets rid of colouring entirely:
|
||||
6. Lastly, the split |brackets in /E/| matches the snippet against the
|
||||
regular expression |E|, and then runs the rules on each bracketed
|
||||
subexpression in turn. (If there is no match, or there are no bracketed
|
||||
terms in |E|, nothing happens.)
|
||||
[[../Private Languages/EquationsExample.ildf as ILDF]]
|
||||
acts on:
|
||||
= (not code)
|
||||
A = 2716
|
||||
B=3
|
||||
C =715 + B
|
||||
D < 14
|
||||
=
|
||||
to produce:
|
||||
= (sample EquationsExample code)
|
||||
A = 2716
|
||||
B=3
|
||||
C =715 + B
|
||||
D < 14
|
||||
=
|
||||
What happens here is that the expression has two bracketed terms, one for
|
||||
the letter, one for the number; the rule is run first on the letter, then
|
||||
on the number, and both are turned to |!function|.
|
||||
|
||||
@h The seven ways rules can apply.
|
||||
Rules are the lines with a |=>| in. As noted, they take the form "if X, then
|
||||
Y". The following are the possibilities for X, the condition.
|
||||
|
||||
1. The easiest thing is to give nothing at all, and then the rule always
|
||||
applies. For example, this somewhat nihilistic program gets rid of colouring
|
||||
entirely:
|
||||
= (sample ILDF code)
|
||||
colouring {
|
||||
=> !plain
|
||||
}
|
||||
=
|
||||
2. X can require the whole snippet to be of a particular colour, by writing
|
||||
|colour C|. For example:
|
||||
|
||||
2. If X is a piece of literal text, the rule applies when the snippet is
|
||||
exactly that text. For example,
|
||||
= (sample ILDF code)
|
||||
printf => !function
|
||||
=
|
||||
|
||||
3. X can require the whole snippet to be of a particular colour, by writing
|
||||
|coloured C|. For example:
|
||||
= (sample ILDF code)
|
||||
colouring {
|
||||
characters {
|
||||
colour !character => !plain
|
||||
coloured !character => !plain
|
||||
}
|
||||
}
|
||||
=
|
||||
removes the syntax colouring on character literals.
|
||||
|
||||
3. X can require the snippet to be one of the language's known keywords, as
|
||||
4. X can require the snippet to be one of the language's known keywords, as
|
||||
declared earlier in the ILD by a |keyword| command. The syntax here is
|
||||
|keyword of C|, where |C| is a colour. For example:
|
||||
= (sample ILDF code)
|
||||
keyword of !element => !element
|
||||
=
|
||||
says: if the snippet is a keyword declared as being of colour |!element|,
|
||||
then actually colour it that way.
|
||||
then actually colour it that way. (This is much faster than making many
|
||||
comparison rules in a row, one for each keyword in the language; Inweb has
|
||||
put all of the registered keywords into a hash table for rapid lookup.)
|
||||
|
||||
4. X can look at a little context before or after the snippet, testing it
|
||||
5. X can look at a little context before or after the snippet, testing it
|
||||
with one of the following: |prefix P|, |spaced prefix P|,
|
||||
|optionally spaced prefix P|. These qualifiers have to do with whether white
|
||||
space must appear after |P| and before the snippet. For example,
|
||||
|
@ -403,12 +532,69 @@ space must appear after |P| and before the snippet. For example,
|
|||
means that any identifier occurring after a |->| token will be coloured
|
||||
as |!element|. Similarly for |suffix|.
|
||||
|
||||
5. And otherwise X is literal text, and the rule applies if and only if
|
||||
the snippet is exactly that text. For example,
|
||||
6. X can test the snippet against a regular expression, with |matching /E/|.
|
||||
For example:
|
||||
= (sample ILDF code)
|
||||
printf => !function
|
||||
runs of !identifier {
|
||||
matching /.*x.*/ => !element
|
||||
}
|
||||
=
|
||||
...turns any identifier containing a lower-case |x| into |!element| colour.
|
||||
Note that |matching /x/| would not have worked, because our regular expression
|
||||
is required to match the entire snippet, not just somewhere inside.
|
||||
= (sample ILDF code)
|
||||
characters in "0123456789" {
|
||||
matching /\d\d\d\d/ => !element
|
||||
}
|
||||
=
|
||||
...colours all four-digit numbers, but no others.
|
||||
|
||||
@ Now let's look at the conclusion Y of a rule. Here the possibilities are
|
||||
7. Whenever a split takes place, Inweb keeps count of how many pieces there are,
|
||||
and different rules can apply to differently numbered pieces. The notation
|
||||
is |number N|, where |N| is the number, counting from 1. For example,
|
||||
[[../Private Languages/ThirdExample.ildf as ILDF]]
|
||||
acts on:
|
||||
= (not code)
|
||||
With how sad steps, O Moon, thou climb'st the skies!
|
||||
How silently, and with how wan a face!
|
||||
What, may it be that even in heav'nly place
|
||||
That busy archer his sharp arrows tries!
|
||||
Sure, if that long-with love-acquainted eyes
|
||||
Can judge of love, thou feel'st a lover's case,
|
||||
I read it in thy looks; thy languish'd grace
|
||||
To me, that feel the like, thy state descries.
|
||||
Then, ev'n of fellowship, O Moon, tell me,
|
||||
Is constant love deem'd there but want of wit?
|
||||
Are beauties there as proud as here they be?
|
||||
Do they above love to be lov'd, and yet
|
||||
Those lovers scorn whom that love doth possess?
|
||||
Do they call virtue there ungratefulness?
|
||||
=
|
||||
to produce:
|
||||
= (sample ThirdExample code)
|
||||
With how sad steps, O Moon, thou climb'st the skies!
|
||||
How silently, and with how wan a face!
|
||||
What, may it be that even in heav'nly place
|
||||
That busy archer his sharp arrows tries!
|
||||
Sure, if that long-with love-acquainted eyes
|
||||
Can judge of love, thou feel'st a lover's case,
|
||||
I read it in thy looks; thy languish'd grace
|
||||
To me, that feel the like, thy state descries.
|
||||
Then, ev'n of fellowship, O Moon, tell me,
|
||||
Is constant love deem'd there but want of wit?
|
||||
Are beauties there as proud as here they be?
|
||||
Do they above love to be lov'd, and yet
|
||||
Those lovers scorn whom that love doth possess?
|
||||
Do they call virtue there ungratefulness?
|
||||
=
|
||||
|
||||
@ Any condition can be reversed by preceding it with |not|. For example,
|
||||
= (sample ILDF code)
|
||||
not coloured !string => !plain
|
||||
=
|
||||
|
||||
@h The three ways rules can take effect.
|
||||
Now let's look at the conclusion Y of a rule. Here the possibilities are
|
||||
simpler:
|
||||
|
||||
1. If Y is the name of a colour, the snippet is painted in that colour.
|
||||
|
@ -429,17 +615,17 @@ rules (see above), it can also be applied to the prefix or suffix: use
|
|||
the notation |=> C on both| or |=> C on suffix| or |=> C on prefix|.
|
||||
|
||||
3. If Y is the word |debug|, then the current snippet and its colouring
|
||||
are printed out on the command line.
|
||||
|
||||
@ The syntax of ILDs tends to avoid superfluous quotation marks as confusing,
|
||||
but sometimes you need to be pedantic. If you want to match the text |=>|,
|
||||
for example, that could lead to ambiguity with the rule marker |=>|. For
|
||||
such occasions, simply put the text in double quotes, and change any literal
|
||||
double quote in it to |\"|, and use |\\| for a literal backslash. For example:
|
||||
are printed out on the command line. Thus:
|
||||
= (sample ILDF code)
|
||||
"keyword" => !reserved
|
||||
colouring {
|
||||
matches of /\d\S+/ {
|
||||
=> debug
|
||||
}
|
||||
}
|
||||
=
|
||||
The rule |=> debug| is unconditional, and will print whenever it's reached.
|
||||
|
||||
@h Example.
|
||||
@h The worm, Ouroboros.
|
||||
Inweb Language Definition Format is a kind of language in itself, and in
|
||||
fact Inweb is supplied with an ILD for ILDF itself, which Inweb used to
|
||||
syntax-colour the examples above. Here it is, as syntax-coloured by itself:
|
||||
|
|
6
Private Languages/AssemblageExample.ildf
Normal file
6
Private Languages/AssemblageExample.ildf
Normal file
|
@ -0,0 +1,6 @@
|
|||
Name: "AssemblageExample"
|
||||
colouring {
|
||||
matches of /\.[A-Za-z_][A-Za-z_0-9]*/ {
|
||||
=> !function
|
||||
}
|
||||
}
|
7
Private Languages/EquationsExample.ildf
Normal file
7
Private Languages/EquationsExample.ildf
Normal file
|
@ -0,0 +1,7 @@
|
|||
Name: "EquationsExample"
|
||||
colouring {
|
||||
=> !plain
|
||||
brackets in /.*?([A-Z])\s*=\s*(\d+).*/ {
|
||||
=> !function
|
||||
}
|
||||
}
|
7
Private Languages/LineageExample.ildf
Normal file
7
Private Languages/LineageExample.ildf
Normal file
|
@ -0,0 +1,7 @@
|
|||
Name: "LineageExample"
|
||||
colouring {
|
||||
=> !plain
|
||||
instances of "son" {
|
||||
=> !function
|
||||
}
|
||||
}
|
10
Private Languages/RunningExample.ildf
Normal file
10
Private Languages/RunningExample.ildf
Normal file
|
@ -0,0 +1,10 @@
|
|||
Name: "RunningExample"
|
||||
colouring {
|
||||
=> !plain
|
||||
characters in "0123456789" {
|
||||
=> !function
|
||||
}
|
||||
runs of !plain {
|
||||
"-" => !function
|
||||
}
|
||||
}
|
7
Private Languages/StdioExample.ildf
Normal file
7
Private Languages/StdioExample.ildf
Normal file
|
@ -0,0 +1,7 @@
|
|||
Name: "StdioExample"
|
||||
colouring {
|
||||
runs of !identifier {
|
||||
printf => !function
|
||||
sscanf => !function
|
||||
}
|
||||
}
|
7
Private Languages/ThirdExample.ildf
Normal file
7
Private Languages/ThirdExample.ildf
Normal file
|
@ -0,0 +1,7 @@
|
|||
Name: "ThirdExample"
|
||||
colouring {
|
||||
=> !plain
|
||||
matches of /\S+/ {
|
||||
number 3 => !function
|
||||
}
|
||||
}
|
7
Private Languages/VowelsExample.ildf
Normal file
7
Private Languages/VowelsExample.ildf
Normal file
|
@ -0,0 +1,7 @@
|
|||
Name: "VowelsExample"
|
||||
colouring {
|
||||
=> !plain
|
||||
characters in "AEIOUaeiou" {
|
||||
=> !function
|
||||
}
|
||||
}
|
1397
Tangled/inweb.c
1397
Tangled/inweb.c
File diff suppressed because it is too large
Load diff
|
@ -469,8 +469,9 @@ says <code class="display"><span class="extract">q</span></code>, the only match
|
|||
</li><li>(e) <code class="display"><span class="extract">%i</span></code> means any character from the identifier class (see above);
|
||||
</li><li>(f) <code class="display"><span class="extract">%p</span></code> means any character which can be used in the name of a Preform
|
||||
nonterminal, which is to say, an identifier character or a hyphen;
|
||||
</li><li>(g) <code class="display"><span class="extract">%P</span></code> means the same or else a colon.
|
||||
</li><li>(h) <code class="display"><span class="extract">%t</span></code> means a tab.
|
||||
</li><li>(g) <code class="display"><span class="extract">%P</span></code> means the same or else a colon;
|
||||
</li><li>(h) <code class="display"><span class="extract">%t</span></code> means a tab;
|
||||
</li><li>(i) <code class="display"><span class="extract">%q</span></code> means a double-quote.
|
||||
</li></ul>
|
||||
<p class="inwebparagraph"><code class="display"><span class="extract">%</span></code> otherwise makes a literal escape; a space means any whitespace character;
|
||||
square brackets enclose literal alternatives, and note as usual with grep
|
||||
|
|
|
@ -524,6 +524,7 @@ little context before it (where available).
|
|||
<span class="definitionkeyword">define</span> <span class="constant">UNSPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">5</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">suffix P</span></code></span>
|
||||
<span class="definitionkeyword">define</span> <span class="constant">SPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">6</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">spaced suffix P</span></code></span>
|
||||
<span class="definitionkeyword">define</span> <span class="constant">OPTIONALLY_SPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">7</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">optionally spaced suffix P</span></code></span>
|
||||
<span class="definitionkeyword">define</span> <span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain"> </span><span class="constant">64</span>
|
||||
</pre>
|
||||
|
||||
<pre class="display">
|
||||
|
@ -533,6 +534,7 @@ little context before it (where available).
|
|||
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">match_keyword_of_colour</span><span class="plain">; </span><span class="comment">for <code class="display"><span class="extract">keyword C</span></code>, or else <code class="display"><span class="extract">NOT_A_COLOUR</span></code></span>
|
||||
<span class="reserved">struct</span><span class="plain"> </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">match_text</span><span class="plain">; </span><span class="comment">or length 0 to mean "anything"</span>
|
||||
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">match_prefix</span><span class="plain">; </span><span class="comment">one of the <code class="display"><span class="extract">*_RULE_PREFIX</span></code> values above</span>
|
||||
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">match_regexp_text</span><span class="plain">[</span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">];</span>
|
||||
|
||||
<span class="comment">the conclusion:</span>
|
||||
<span class="reserved">struct</span><span class="plain"> </span><span class="reserved">colouring_language_block</span><span class="plain"> *</span><span class="identifier">execute_block</span><span class="plain">; </span><span class="comment">or <code class="display"><span class="extract">NULL</span></code>, in which case...</span>
|
||||
|
@ -542,6 +544,7 @@ little context before it (where available).
|
|||
|
||||
<span class="comment">workspace during painting</span>
|
||||
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">fix_position</span><span class="plain">; </span><span class="comment">where the prefix or suffix started</span>
|
||||
<span class="reserved">struct</span><span class="plain"> </span><span class="reserved">match_results</span><span class="plain"> </span><span class="identifier">mr</span><span class="plain">; </span><span class="comment">of a regular expression</span>
|
||||
<span class="constant">MEMORY_MANAGEMENT</span>
|
||||
<span class="plain">} </span><span class="reserved">colouring_rule</span><span class="plain">;</span>
|
||||
</pre>
|
||||
|
@ -562,11 +565,15 @@ little context before it (where available).
|
|||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">match_text</span><span class="plain"> = </span><span class="identifier">NULL</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">NOT_A_RULE_PREFIX</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">match_keyword_of_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">match_regexp_text</span><span class="plain">[0] = </span><span class="constant">0</span><span class="plain">;</span>
|
||||
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">set_to_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">set_prefix_to_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">execute_block</span><span class="plain"> = </span><span class="identifier">NULL</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">debug</span><span class="plain"> = </span><span class="constant">FALSE</span><span class="plain">;</span>
|
||||
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">fix_position</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">mr</span><span class="plain"> = </span><span class="functiontext">Regexp::create_mr</span><span class="plain">();</span>
|
||||
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">rule</span><span class="plain">;</span>
|
||||
<span class="plain">}</span>
|
||||
</pre>
|
||||
|
@ -607,6 +614,8 @@ little context before it (where available).
|
|||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"prefix (%c+)"</span><span class="plain">)) {</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">UNSPACED_RULE_PREFIX</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">match_text</span><span class="plain"> = </span><span class="functiontext">Languages::text</span><span class="plain">(</span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">, </span><span class="constant">FALSE</span><span class="plain">);</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"match (%c+)"</span><span class="plain">)) {</span>
|
||||
<span class="functiontext">Languages::regexp</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-></span><span class="element">match_regexp_text</span><span class="plain">, </span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">);</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"spaced prefix (%c+)"</span><span class="plain">)) {</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">SPACED_RULE_PREFIX</span><span class="plain">;</span>
|
||||
<span class="identifier">rule</span><span class="plain">-></span><span class="element">match_text</span><span class="plain"> = </span><span class="functiontext">Languages::text</span><span class="plain">(</span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">, </span><span class="constant">FALSE</span><span class="plain">);</span>
|
||||
|
@ -823,6 +832,81 @@ literal backslash.
|
|||
|
||||
<p class="endnote">The function Languages::text is used in <a href="#SP7_1">§7.1</a>, <a href="#SP7_2">§7.2</a>, <a href="#SP12_1">§12.1</a>.</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP17"></a><b>§17. </b>And regular expressions.
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="reserved">void</span><span class="plain"> </span><span class="functiontext">Languages::regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">T</span><span class="plain">, </span><span class="reserved">text_file_position</span><span class="plain"> *</span><span class="identifier">tfp</span><span class="plain">) {</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">write_to</span><span class="plain"> == </span><span class="identifier">NULL</span><span class="plain">) </span><span class="identifier">internal_error</span><span class="plain">(</span><span class="string">"no buffer"</span><span class="plain">);</span>
|
||||
<span class="identifier">write_to</span><span class="plain">[0] = </span><span class="constant">0</span><span class="plain">;</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">) > </span><span class="constant">0</span><span class="plain">) {</span>
|
||||
<span class="reserved">int</span><span class="plain"> </span><span class="identifier">from</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">, </span><span class="identifier">to</span><span class="plain"> = </span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">)-1, </span><span class="identifier">x</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">;</span>
|
||||
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">to</span><span class="plain"> > </span><span class="identifier">from</span><span class="plain">) &&</span>
|
||||
<span class="plain">(</span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">from</span><span class="plain">) == </span><span class="character">'/'</span><span class="plain">) && (</span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">to</span><span class="plain">) == </span><span class="character">'/'</span><span class="plain">)) {</span>
|
||||
<span class="identifier">from</span><span class="plain">++; </span><span class="identifier">to</span><span class="plain">--;</span>
|
||||
<span class="reserved">for</span><span class="plain"> (</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">=</span><span class="identifier">from</span><span class="plain">; </span><span class="identifier">i</span><span class="plain"><=</span><span class="identifier">to</span><span class="plain">; </span><span class="identifier">i</span><span class="plain">++) {</span>
|
||||
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain"> = </span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">);</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\\'</span><span class="plain">) {</span>
|
||||
<span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">w</span><span class="plain"> = </span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">+1);</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'\\'</span><span class="plain">) {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">w</span><span class="plain">);</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'d'</span><span class="plain">) {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'d'</span><span class="plain">);</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'t'</span><span class="plain">) {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'t'</span><span class="plain">);</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'s'</span><span class="plain">) {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">' '</span><span class="plain">);</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'S'</span><span class="plain">) {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'C'</span><span class="plain">);</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'"'</span><span class="plain">) {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'q'</span><span class="plain">);</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">w</span><span class="plain">);</span>
|
||||
<span class="plain">}</span>
|
||||
<span class="identifier">i</span><span class="plain">++;</span>
|
||||
<span class="reserved">continue</span><span class="plain">;</span>
|
||||
<span class="plain">}</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'.'</span><span class="plain">) {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'c'</span><span class="plain">);</span>
|
||||
<span class="reserved">continue</span><span class="plain">;</span>
|
||||
<span class="plain">}</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'%'</span><span class="plain">) {</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'%'</span><span class="plain">);</span>
|
||||
<span class="reserved">continue</span><span class="plain">;</span>
|
||||
<span class="plain">}</span>
|
||||
<span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">c</span><span class="plain">);</span>
|
||||
<span class="plain">}</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> {</span>
|
||||
<span class="functiontext">Errors::in_text_file</span><span class="plain">(</span>
|
||||
<span class="string">"the expression to match must be in slashes '/'"</span><span class="plain">, </span><span class="identifier">tfp</span><span class="plain">);</span>
|
||||
<span class="plain">}</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">x</span><span class="plain"> >= </span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">)</span>
|
||||
<span class="functiontext">Errors::in_text_file</span><span class="plain">(</span>
|
||||
<span class="string">"the expression to match is too long"</span><span class="plain">, </span><span class="identifier">tfp</span><span class="plain">);</span>
|
||||
<span class="plain">}</span>
|
||||
<span class="plain">}</span>
|
||||
|
||||
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">i</span><span class="plain"> < </span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">) </span><span class="identifier">write_to</span><span class="plain">[</span><span class="identifier">i</span><span class="plain">++] = </span><span class="identifier">c</span><span class="plain">;</span>
|
||||
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">;</span>
|
||||
<span class="plain">}</span>
|
||||
|
||||
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
|
||||
<span class="identifier">i</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">, </span><span class="character">'%'</span><span class="plain">);</span>
|
||||
<span class="identifier">i</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">c</span><span class="plain">);</span>
|
||||
<span class="reserved">return</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">;</span>
|
||||
<span class="plain">}</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph"></p>
|
||||
|
||||
<p class="endnote">The function Languages::regexp is used in <a href="#SP12_1">§12.1</a>.</p>
|
||||
|
||||
<p class="endnote">The function Languages::add_to_regexp appears nowhere else.</p>
|
||||
|
||||
<p class="endnote">The function Languages::add_escape_to_regexp appears nowhere else.</p>
|
||||
|
||||
<hr class="tocbar">
|
||||
<ul class="toc"><li><i>(This section begins Chapter 4: Languages.)</i></li><li><a href="4-lm.html">Continue with 'Language Methods'</a></li></ul><hr class="tocbar">
|
||||
<!--End of weave-->
|
||||
|
|
|
@ -378,7 +378,10 @@ rule across the whole snippet before moving on to the next.
|
|||
<pre class="display">
|
||||
<span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Painter::satisfies</span><span class="plain">(</span><span class="reserved">hash_table</span><span class="plain"> *</span><span class="identifier">HT</span><span class="plain">, </span><span class="reserved">colouring_rule</span><span class="plain"> *</span><span class="identifier">rule</span><span class="plain">, </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">matter</span><span class="plain">,</span>
|
||||
<span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">colouring</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">from</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">to</span><span class="plain">) {</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-></span><span class="element">match_text</span><span class="plain">) > </span><span class="constant">0</span><span class="plain">) {</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="identifier">rule</span><span class="plain">-></span><span class="identifier">match_regexp_text</span><span class="plain">[0]) {</span>
|
||||
<span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&(</span><span class="identifier">rule</span><span class="plain">-></span><span class="element">mr</span><span class="plain">), </span><span class="identifier">matter</span><span class="plain">, </span><span class="identifier">rule</span><span class="plain">-></span><span class="element">match_regexp_text</span><span class="plain">) == </span><span class="constant">FALSE</span><span class="plain">)</span>
|
||||
<span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
|
||||
<span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-></span><span class="element">match_text</span><span class="plain">) > </span><span class="constant">0</span><span class="plain">) {</span>
|
||||
<span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">rule</span><span class="plain">-></span><span class="identifier">match_prefix</span><span class="plain"> == </span><span class="constant">UNSPACED_RULE_PREFIX</span><span class="plain">) ||</span>
|
||||
<span class="plain">(</span><span class="identifier">rule</span><span class="plain">-></span><span class="element">match_prefix</span><span class="plain"> == </span><span class="constant">SPACED_RULE_PREFIX</span><span class="plain">) ||</span>
|
||||
<span class="plain">(</span><span class="identifier">rule</span><span class="plain">-></span><span class="element">match_prefix</span><span class="plain"> == </span><span class="constant">OPTIONALLY_SPACED_RULE_PREFIX</span><span class="plain">)) {</span>
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<title>M/awwp</title>
|
||||
<title>Booklet Title</title>
|
||||
<meta name="viewport" content="width=device-width initial-scale=1">
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<meta http-equiv="Content-Language" content="en-gb">
|
||||
|
@ -26,7 +26,7 @@
|
|||
<!--Weave of 'M/spl' generated by 7-->
|
||||
<ul class="crumbs"><li><a href="../webs.html">Source</a></li><li><a href="index.html">inweb</a></li><li><a href="index.html#M">Manual</a></li><li><b>Supporting Programming Languages</b></li></ul><p class="purpose">How to work with a programming language not yet supported by Inweb.</p>
|
||||
|
||||
<ul class="toc"><li><a href="#SP1">§1. Introduction</a></li><li><a href="#SP4">§4. Structure of language definitions</a></li><li><a href="#SP5">§5. Properties</a></li><li><a href="#SP16">§16. Secret Features</a></li><li><a href="#SP17">§17. Keywords</a></li><li><a href="#SP18">§18. Syntax colouring program</a></li><li><a href="#SP26">§26. Example</a></li></ul><hr class="tocbar">
|
||||
<ul class="toc"><li><a href="#SP1">§1. Introduction</a></li><li><a href="#SP4">§4. Structure of language definitions</a></li><li><a href="#SP5">§5. Properties</a></li><li><a href="#SP16">§16. Secret Features</a></li><li><a href="#SP17">§17. Keywords</a></li><li><a href="#SP18">§18. Syntax colouring program</a></li><li><a href="#SP22">§22. The six splits</a></li><li><a href="#SP23">§23. The seven ways rules can apply</a></li><li><a href="#SP25">§25. The three ways rules can take effect</a></li><li><a href="#SP26">§26. The worm, Ouroboros</a></li></ul><hr class="tocbar">
|
||||
|
||||
<p class="inwebparagraph"><a id="SP1"></a><b>§1. Introduction. </b>To a very large extent, Inweb works the same way regardless of what language
|
||||
its webs are using, and that is deliberate. On the other hand, when a web
|
||||
|
@ -96,7 +96,9 @@ This section of the manual is about how to do it.
|
|||
|
||||
<p class="inwebparagraph">Once you have written a definition, use <code class="display"><span class="extract">-read-language L</span></code> at the command
|
||||
line, where <code class="display"><span class="extract">L</span></code> is the file defining it. If you have many custom languages,
|
||||
<code class="display"><span class="extract">-read-languages D</span></code> reads all of the definitions in a directory <code class="display"><span class="extract">D</span></code>.
|
||||
<code class="display"><span class="extract">-read-languages D</span></code> reads all of the definitions in a directory <code class="display"><span class="extract">D</span></code>. Or, if
|
||||
the language in question is really quite specific to a single web, you can
|
||||
make a <code class="display"><span class="extract">Private Languages</span></code> subdirectory of the web and put it in there.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP4"></a><b>§4. Structure of language definitions. </b>Each language is defined by a single ILDF file. ("Inweb Language Definition
|
||||
|
@ -111,7 +113,7 @@ so are comments, which are lines beginning with a <code class="display"><span cl
|
|||
<p class="inwebparagraph">The ILD contains three sorts of thing:
|
||||
</p>
|
||||
|
||||
<ul class="items"><li>(a) Properties, set by lines in the form <code class="display"><span class="extract">Name: C++</span></code>.
|
||||
<ul class="items"><li>(a) Properties, set by lines in the form <code class="display"><span class="extract">Name: "C++"</span></code>.
|
||||
</li><li>(b) Keywords, set by lines in the form <code class="display"><span class="extract">keyword int</span></code>.
|
||||
</li><li>(c) A colouring program, introduced by <code class="display"><span class="extract">colouring {</span></code> and continuing until the
|
||||
last block of it is closed with a <code class="display"><span class="extract">}</span></code>.
|
||||
|
@ -122,9 +124,9 @@ practice, though, every ILD should open like so:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Name</span><span class="plain">: </span><span class="identifier">C</span>
|
||||
<span class="identifier">Details</span><span class="plain">: </span><span class="identifier">The</span><span class="plain"> </span><span class="identifier">C</span><span class="plain"> </span><span class="identifier">programming</span><span class="plain"> </span><span class="identifier">language</span>
|
||||
<span class="identifier">Extension</span><span class="plain">: .</span><span class="identifier">c</span>
|
||||
<span class="plain">Name: </span><span class="string">"C"</span>
|
||||
<span class="plain">Details: </span><span class="string">"The C programming language"</span>
|
||||
<span class="plain">Extension: </span><span class="string">".c"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph"></p>
|
||||
|
@ -133,7 +135,7 @@ practice, though, every ILD should open like so:
|
|||
with the semi-compulsory ones.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><code class="display"><span class="extract">Name</span></code>. This is the one used by webs in their <code class="display"><span class="extract">Language: X</span></code> lines, and should
|
||||
<p class="inwebparagraph"><code class="display"><span class="extract">Name</span></code>. This is the one used by webs in their <code class="display"><span class="extract">Language: "X"</span></code> lines, and should
|
||||
match the ILD's own filename: wherever it is stored, the ILD for langauge <code class="display"><span class="extract">X</span></code>
|
||||
should be filenamed <code class="display"><span class="extract">X.ildf</span></code>.
|
||||
</p>
|
||||
|
@ -169,9 +171,9 @@ as a pair or not at all, is the notation for multiline comments.
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Multiline</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain"> </span><span class="identifier">Open</span><span class="plain">: /*</span>
|
||||
<span class="identifier">Multiline</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain"> </span><span class="identifier">Close</span><span class="plain">: */</span>
|
||||
<span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain">: //</span>
|
||||
<span class="plain">Multiline Comment Open: </span><span class="string">"/*"</span>
|
||||
<span class="plain">Multiline Comment Close: </span><span class="string">"*/"</span>
|
||||
<span class="plain">Line Comment: </span><span class="string">"//"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph"></p>
|
||||
|
@ -196,10 +198,10 @@ character literals.
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: </span><span class="string">"\""</span>
|
||||
<span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
|
||||
<span class="identifier">Character</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: '</span>
|
||||
<span class="identifier">Character</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
|
||||
<span class="plain">String Literal: </span><span class="string">"\""</span>
|
||||
<span class="plain">String Literal Escape: </span><span class="string">"\\"</span>
|
||||
<span class="plain">Character Literal: </span><span class="string">"'"</span>
|
||||
<span class="plain">Character Literal Escape: </span><span class="string">"\\"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph"></p>
|
||||
|
@ -220,9 +222,9 @@ are notations for non-decimal numbers, if they exist.
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Hexadecimal</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: </span><span class="constant">0</span><span class="identifier">x</span>
|
||||
<span class="identifier">Binary</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: </span><span class="constant">0</span><span class="identifier">b</span>
|
||||
<span class="identifier">Negative</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: -</span>
|
||||
<span class="plain">Hexadecimal Literal Prefix: </span><span class="string">"0x"</span>
|
||||
<span class="plain">Binary Literal Prefix: </span><span class="string">"0b"</span>
|
||||
<span class="plain">Negative Literal Prefix: </span><span class="string">"-"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph"></p>
|
||||
|
@ -235,7 +237,7 @@ For example, Perl defines:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Shebang</span><span class="plain">: #!/</span><span class="identifier">usr</span><span class="plain">/</span><span class="identifier">bin</span><span class="plain">/</span><span class="identifier">perl</span><span class="plain">\</span><span class="identifier">n</span><span class="plain">\</span><span class="identifier">n</span>
|
||||
<span class="plain">Shebang: </span><span class="string">"#!/usr/bin/perl\n\n"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Most languages do not have a shebang.
|
||||
|
@ -255,7 +257,7 @@ that this language does, and gives the notation. For example, C provides:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Marker</span><span class="plain">: </span><span class="string">"#line %d \"%f\"\n"</span>
|
||||
<span class="plain">Line Marker: </span><span class="string">"#line %d \"%f\"\n"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Here <code class="display"><span class="extract">%d</span></code> expands to the line number, and <code class="display"><span class="extract">%f</span></code> the filename, of origin.
|
||||
|
@ -272,8 +274,8 @@ matter added. This material is in <code class="display"><span class="extract">Be
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Before</span><span class="plain"> </span><span class="identifier">Named</span><span class="plain"> </span><span class="identifier">Paragraph</span><span class="plain"> </span><span class="identifier">Expansion</span><span class="plain">: \</span><span class="identifier">n</span><span class="reserved">{</span><span class="plain">\</span><span class="identifier">n</span>
|
||||
<span class="identifier">After</span><span class="plain"> </span><span class="identifier">Named</span><span class="plain"> </span><span class="identifier">Paragraph</span><span class="plain"> </span><span class="identifier">Expansion</span><span class="plain">: </span><span class="reserved">}</span><span class="plain">\</span><span class="identifier">n</span>
|
||||
<span class="plain">Before Named Paragraph Expansion: </span><span class="string">"\n{\n"</span>
|
||||
<span class="plain">After Named Paragraph Expansion: </span><span class="string">"}\n"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">The effect of this is to ensure that code such as:
|
||||
|
@ -317,8 +319,8 @@ if given, places any ending notation. For example, Inform 6 defines:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain">: </span><span class="identifier">Constant</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain"> =\</span><span class="identifier">s</span>
|
||||
<span class="identifier">End</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain">: ;\</span><span class="identifier">n</span>
|
||||
<span class="plain">Start Definition: </span><span class="string">"Constant %S =\s"</span>
|
||||
<span class="plain">End Definition: </span><span class="string">";\n"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">where <code class="display"><span class="extract">%S</span></code> expands to the name of the term to be defined. Thus, we might tangle
|
||||
|
@ -327,7 +329,7 @@ out to:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Constant</span><span class="plain"> </span><span class="identifier">TAXICAB</span><span class="plain"> = </span><span class="constant">1729</span><span class="plain">;\</span><span class="identifier">n</span>
|
||||
<span class="plain">Constant TAXICAB = 1729;\n</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Inweb ignores all definitions unless one of these three properties is given.
|
||||
|
@ -342,10 +344,10 @@ and in <code class="display"><span class="extract">Start Ifndef</span></code> an
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Ifdef</span><span class="plain">: #</span><span class="identifier">ifdef</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain">;\</span><span class="identifier">n</span>
|
||||
<span class="identifier">End</span><span class="plain"> </span><span class="identifier">Ifdef</span><span class="plain">: #</span><span class="identifier">endif</span><span class="plain">; ! %</span><span class="identifier">S</span><span class="plain">\</span><span class="identifier">n</span>
|
||||
<span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Ifndef</span><span class="plain">: #</span><span class="identifier">ifndef</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain">;\</span><span class="identifier">n</span>
|
||||
<span class="identifier">End</span><span class="plain"> </span><span class="identifier">Ifndef</span><span class="plain">: #</span><span class="identifier">endif</span><span class="plain">; ! %</span><span class="identifier">S</span><span class="plain">\</span><span class="identifier">n</span>
|
||||
<span class="plain">Start Ifdef: </span><span class="string">"#ifdef %S;\n"</span>
|
||||
<span class="plain">End Ifdef: </span><span class="string">"#endif; ! %S\n"</span>
|
||||
<span class="plain">Start Ifndef: </span><span class="string">"#ifndef %S;\n"</span>
|
||||
<span class="plain">End Ifndef: </span><span class="string">"#endif; ! %S\n"</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">which is a subtly different notation from the C one. Again, <code class="display"><span class="extract">%S</span></code> expands to
|
||||
|
@ -393,7 +395,7 @@ in the language in question. For C, then, we include the line:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">void</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">void</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Keywords can be declared in a number of categories, which are identified by
|
||||
|
@ -403,7 +405,7 @@ for example:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">isdigit</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">function</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">isdigit</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!function</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">makes a keyword of colour <code class="display"><span class="extract">!function</span></code>.
|
||||
|
@ -420,8 +422,8 @@ palette of possibilities:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">!</span><span class="element">character</span><span class="plain"> !</span><span class="element">comment</span><span class="plain"> !</span><span class="element">constant</span><span class="plain"> !</span><span class="element">definition</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> !</span><span class="element">extract</span>
|
||||
<span class="plain">!</span><span class="element">function</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> !</span><span class="element">plain</span><span class="plain"> !</span><span class="element">reserved</span><span class="plain"> !</span><span class="element">string</span>
|
||||
<span class="element">!character</span><span class="plain"> </span><span class="element">!comment</span><span class="plain"> </span><span class="element">!constant</span><span class="plain"> </span><span class="element">!definition</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="element">!extract</span>
|
||||
<span class="element">!function</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="element">!plain</span><span class="plain"> </span><span class="element">!reserved</span><span class="plain"> </span><span class="element">!string</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Each character has its own colour. At the start of the process, every
|
||||
|
@ -454,7 +456,7 @@ empty program is legal but does nothing:
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
|
@ -466,68 +468,225 @@ block, that's a line of source code. Blocks normally contain one or more
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">marble</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">extract</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="string">marble</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Rules take the form of "if X, then Y", and the <code class="display"><span class="extract">=></span></code> divides the X from the Y.
|
||||
This one says that if the snippet consists of the word "marble", then colour
|
||||
it <code class="display"><span class="extract">!extract</span></code>. Of course this is not very useful, since it would only catch
|
||||
it <code class="display"><span class="extract">!function</span></code>. Of course this is not very useful, since it would only catch
|
||||
lines containing only that one word. So we really want to narrow in on smaller
|
||||
snippets:
|
||||
snippets. This, for example, applies its rule to each individual character
|
||||
in turn:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">characters</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">X</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">extract</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="string">K</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!identifier</span>
|
||||
<span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">The effect of the <code class="display"><span class="extract">characters {</span></code> ... <code class="display"><span class="extract">}</span></code> block is to apply all its rules to
|
||||
each character of the snippet owning it. Inside the block, then, the snippet
|
||||
is always just a single character, and our rule tells us to paint the letter X
|
||||
wherever it occurs.
|
||||
<p class="inwebparagraph"></p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP21"></a><b>§21. </b>In the above examples, <code class="display"><span class="extract">K</span></code> and <code class="display"><span class="extract">marble</span></code> appeared without quotation marks,
|
||||
but they were only allowed to do that because (a) they were single words,
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP21"></a><b>§21. </b>The block <code class="display"><span class="extract">instances of X</span></code> narrows in on each usage of the text <code class="display"><span class="extract">X</span></code> inside
|
||||
<ul class="items"><li>(b) those words had no other meaning, and (c) they didn't contain any
|
||||
awkward characters. For any more complicated texts, always use quotation
|
||||
marks. For example, in
|
||||
</li></ul>
|
||||
|
||||
<pre class="display">
|
||||
<span class="string">"=>"</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!reserved</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">the <code class="display"><span class="extract">=></span></code> in quotes is just text, whereas the one outside quotes is being
|
||||
used to divide a rule.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">If you need a literal double quote inside the double-quotes, use <code class="display"><span class="extract">\"</span></code>; and
|
||||
use <code class="display"><span class="extract">\\</span></code> for a literal backslash. For example:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="string">"\\\""</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!reserved</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">actually matches the text <code class="display"><span class="extract">\"</span></code>.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP22"></a><b>§22. The six splits. </b><code class="display"><span class="extract">characters</span></code> is an example of a "split", which splits up the original snippet
|
||||
of text — say, the line <code class="display"><span class="extract">let K = 2</span></code> — into smaller, non-overlapping snippets
|
||||
— in this case, nine of them: <code class="display"><span class="extract">l</span></code>, <code class="display"><span class="extract">e</span></code>, <code class="display"><span class="extract">t</span></code>, <code class="display"><span class="extract"> </span></code>, <code class="display"><span class="extract">K</span></code>, <code class="display"><span class="extract"> </span></code>, <code class="display"><span class="extract">=</span></code>, <code class="display"><span class="extract"> </span></code>, and <code class="display"><span class="extract">2</span></code>.
|
||||
Every split is followed by a block of rules, which is applied to each of the
|
||||
pieces in turn. Inweb works sideways-first: thus, if the block contains rules
|
||||
R1, R2, ..., then R1 is applied to each piece first, then R2 to each piece,
|
||||
and so on.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">There are several different ways to split, all of them written in the
|
||||
plural, to emphasize that they work on what are usually multiple things.
|
||||
Rules, on the other hand, are written in the singular. Splits are not allowed
|
||||
to be followed by <code class="display"><span class="extract">=></span></code>: they always begin a block.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">1. <code class="display"><span class="extract">characters</span></code> splits the snippet into each of its characters.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">2. <code class="display"><span class="extract">characters in T</span></code> splits the snippet into each of its characters which
|
||||
lie inside the text <code class="display"><span class="extract">T</span></code>. For example, here is a not very useful ILD for
|
||||
plain text in which all vowels are in red:
|
||||
</p>
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">Name: </span><span class="string">"VowelsExample"</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
<span class="plain"> </span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"AEIOUaeiou"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Given the text:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">A noir, E blanc, I rouge, U vert, O bleu : voyelles,</span>
|
||||
<span class="plain">Je dirai quelque jour vos naissances latentes :</span>
|
||||
<span class="plain">A, noir corset velu des mouches éclatantes</span>
|
||||
<span class="plain">Qui bombinent autour des puanteurs cruelles,</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">this produces:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="functiontext">A</span><span class="plain"> n</span><span class="functiontext">oi</span><span class="plain">r, </span><span class="functiontext">E</span><span class="plain"> bl</span><span class="functiontext">a</span><span class="plain">nc, </span><span class="functiontext">I</span><span class="plain"> r</span><span class="functiontext">ou</span><span class="plain">g</span><span class="functiontext">e</span><span class="plain">, </span><span class="functiontext">U</span><span class="plain"> v</span><span class="functiontext">e</span><span class="plain">rt, </span><span class="functiontext">O</span><span class="plain"> bl</span><span class="functiontext">eu</span><span class="plain"> : v</span><span class="functiontext">o</span><span class="plain">y</span><span class="functiontext">e</span><span class="plain">ll</span><span class="functiontext">e</span><span class="plain">s,</span>
|
||||
<span class="plain">J</span><span class="functiontext">e</span><span class="plain"> d</span><span class="functiontext">i</span><span class="plain">r</span><span class="functiontext">ai</span><span class="plain"> q</span><span class="functiontext">ue</span><span class="plain">lq</span><span class="functiontext">ue</span><span class="plain"> j</span><span class="functiontext">ou</span><span class="plain">r v</span><span class="functiontext">o</span><span class="plain">s n</span><span class="functiontext">ai</span><span class="plain">ss</span><span class="functiontext">a</span><span class="plain">nc</span><span class="functiontext">e</span><span class="plain">s l</span><span class="functiontext">a</span><span class="plain">t</span><span class="functiontext">e</span><span class="plain">nt</span><span class="functiontext">e</span><span class="plain">s :</span>
|
||||
<span class="functiontext">A</span><span class="plain">, n</span><span class="functiontext">oi</span><span class="plain">r c</span><span class="functiontext">o</span><span class="plain">rs</span><span class="functiontext">e</span><span class="plain">t v</span><span class="functiontext">e</span><span class="plain">l</span><span class="functiontext">u</span><span class="plain"> d</span><span class="functiontext">e</span><span class="plain">s m</span><span class="functiontext">ou</span><span class="plain">ch</span><span class="functiontext">e</span><span class="plain">s écl</span><span class="functiontext">a</span><span class="plain">t</span><span class="functiontext">a</span><span class="plain">nt</span><span class="functiontext">e</span><span class="plain">s</span>
|
||||
<span class="plain">Q</span><span class="functiontext">ui</span><span class="plain"> b</span><span class="functiontext">o</span><span class="plain">mb</span><span class="functiontext">i</span><span class="plain">n</span><span class="functiontext">e</span><span class="plain">nt </span><span class="functiontext">au</span><span class="plain">t</span><span class="functiontext">ou</span><span class="plain">r d</span><span class="functiontext">e</span><span class="plain">s p</span><span class="functiontext">ua</span><span class="plain">nt</span><span class="functiontext">eu</span><span class="plain">rs cr</span><span class="functiontext">ue</span><span class="plain">ll</span><span class="functiontext">e</span><span class="plain">s,</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">3. The split <code class="display"><span class="extract">instances of X</span></code> narrows in on each usage of the text <code class="display"><span class="extract">X</span></code> inside
|
||||
the snippet. For example,
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> == </span><span class="reserved">{</span>
|
||||
<span class="reserved">=></span><span class="plain"> !</span><span class="element">reserved</span>
|
||||
<span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
<span class="plain">Name: </span><span class="string">"LineageExample"</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
<span class="plain"> </span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"son"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">gives every usage of <code class="display"><span class="extract">==</span></code> the colour <code class="display"><span class="extract">!reserved</span></code>. Note that it never runs in
|
||||
an overlapping way: the snippet <code class="display"><span class="extract">===</span></code> would be considered as having only one
|
||||
instance of <code class="display"><span class="extract">==</span></code> (the first two characters), while <code class="display"><span class="extract">====</span></code> would have two.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP22"></a><b>§22. </b>Another kind of block is <code class="display"><span class="extract">runs of C</span></code>, where <code class="display"><span class="extract">C</span></code> is a colour. For example:
|
||||
<p class="inwebparagraph">acts on the text:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">printf</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">function</span>
|
||||
<span class="identifier">sscanf</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">function</span>
|
||||
<span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
<span class="plain">Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the</span>
|
||||
<span class="plain">grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">If this runs on the line <code class="display"><span class="extract">if (x == 1) printf("Hello!");</span></code>, then the inner
|
||||
block will run three times: its snippet will be <code class="display"><span class="extract">if</span></code>, then <code class="display"><span class="extract">x</span></code>, then <code class="display"><span class="extract">printf</span></code>.
|
||||
The rules inside the block will take effect only on the third time, when it
|
||||
will paint the word <code class="display"><span class="extract">printf</span></code> in <code class="display"><span class="extract">!function</span></code> colour.
|
||||
<p class="inwebparagraph">to produce:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">Jacob first appears in the Book of Genesis, the </span><span class="functiontext">son</span><span class="plain"> of Isaac and Rebecca, the</span>
|
||||
<span class="plain">grand</span><span class="functiontext">son</span><span class="plain"> of Abraham, Sarah and Bethuel, the nephew of Ishmael.</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Note that it never runs in an overlapping way: the snippet <code class="display"><span class="extract">===</span></code> would be
|
||||
considered as having only one instance of <code class="display"><span class="extract">==</span></code> (the first two characters),
|
||||
while <code class="display"><span class="extract">====</span></code> would have two.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">4. The split <code class="display"><span class="extract">runs of C</span></code>, where <code class="display"><span class="extract">C</span></code> describes a colour, splits the snippet
|
||||
into non-overlapping contiguous pieces which have that colour. For example:
|
||||
</p>
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">Name: </span><span class="string">"RunningExample"</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
<span class="plain"> </span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"0123456789"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="plain"> </span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!plain</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="string">"-"</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">acts on:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,</span>
|
||||
<span class="plain">who published their so-called Memoirs over the period 1798-1801.</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">to produce:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">Napoleon Bonaparte (</span><span class="functiontext">1769-1821</span><span class="plain">) took </span><span class="functiontext">167</span><span class="plain"> scientists to Egypt in </span><span class="functiontext">1798</span><span class="plain">,</span>
|
||||
<span class="plain">who published their so-called Memoirs over the period </span><span class="functiontext">1798-1801</span><span class="plain">.</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">Here the hyphens in number ranges have been coloured, but not the hyphen
|
||||
in "so-called".
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">A more computer-science sort of example would be:
|
||||
</p>
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">Name: </span><span class="string">"StdioExample"</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="string">printf</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="string">sscanf</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">which acts on:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">if (x == 1) printf("Hello!");</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">to produce:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">if</span><span class="plain"> (</span><span class="identifier">x</span><span class="plain"> == </span><span class="constant">1</span><span class="plain">) </span><span class="functiontext">printf</span><span class="plain">("</span><span class="identifier">Hello</span><span class="plain">!");</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">The split divides the line up into three runs, and the inner block runs three
|
||||
times: on <code class="display"><span class="extract">if</span></code>, then <code class="display"><span class="extract">x</span></code>, then <code class="display"><span class="extract">printf</span></code>. Only the third time has any effect.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">As a special form, <code class="display"><span class="extract">runs of unquoted</span></code> means "runs of characters not painted
|
||||
|
@ -535,30 +694,124 @@ either with <code class="display"><span class="extract">!string</span></code> or
|
|||
not a colour.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP23"></a><b>§23. </b>It remains to specify what rules can do. As noted, they take the form
|
||||
"if X, then Y". The following are the possibilities for X, the condition:
|
||||
<p class="inwebparagraph">5. The split <code class="display"><span class="extract">matches of /E/</span></code>, where <code class="display"><span class="extract">/E/</span></code> is a regular expression (see below),
|
||||
splits the snippet up into non-overlapping pieces which match it: possibly
|
||||
none at all, of course, in which case the block of rules is never used.
|
||||
This is easier to demonstrate than explain:
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">1. X can be omitted altogether, and then the rule always applies. For example,
|
||||
this somewhat nihilistic program gets rid of colouring entirely:
|
||||
<pre class="display">
|
||||
<span class="plain">Name: </span><span class="string">"AssemblageExample"</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\.[A-Za-z_][A-Za-z_0-9]*/</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">which acts on:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">=></span><span class="plain"> !</span><span class="element">plain</span>
|
||||
<span class="plain">JSR .initialise</span>
|
||||
<span class="plain">LDR A, #.data</span>
|
||||
<span class="plain">RTS</span>
|
||||
<span class="plain">.initialise</span>
|
||||
<span class="plain">TAX</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">to produce:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">JSR</span><span class="plain"> </span><span class="functiontext">.initialise</span>
|
||||
<span class="identifier">LDR</span><span class="plain"> </span><span class="identifier">A</span><span class="plain">, #</span><span class="functiontext">.data</span>
|
||||
<span class="identifier">RTS</span>
|
||||
<span class="functiontext">.initialise</span>
|
||||
<span class="identifier">TAX</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">6. Lastly, the split <code class="display"><span class="extract">brackets in /E/</span></code> matches the snippet against the
|
||||
regular expression <code class="display"><span class="extract">E</span></code>, and then runs the rules on each bracketed
|
||||
subexpression in turn. (If there is no match, or there are no bracketed
|
||||
terms in <code class="display"><span class="extract">E</span></code>, nothing happens.)
|
||||
</p>
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">Name: </span><span class="string">"EquationsExample"</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
<span class="plain"> </span><span class="reserved">brackets</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="functiontext">/.*?([A-Z])\s*=\s*(\d+).*/</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">acts on:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">A = 2716</span>
|
||||
<span class="plain">B=3</span>
|
||||
<span class="plain">C =715 + B</span>
|
||||
<span class="plain">D < 14</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">to produce:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="functiontext">A</span><span class="plain"> = </span><span class="functiontext">2716</span>
|
||||
<span class="functiontext">B</span><span class="plain">=</span><span class="functiontext">3</span>
|
||||
<span class="functiontext">C</span><span class="plain"> =</span><span class="functiontext">715</span><span class="plain"> + B</span>
|
||||
<span class="plain">D < 14</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">What happens here is that the expression has two bracketed terms, one for
|
||||
the letter, one for the number; the rule is run first on the letter, then
|
||||
on the number, and both are turned to <code class="display"><span class="extract">!function</span></code>.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP23"></a><b>§23. The seven ways rules can apply. </b>Rules are the lines with a <code class="display"><span class="extract">=></span></code> in. As noted, they take the form "if X, then
|
||||
Y". The following are the possibilities for X, the condition.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">1. The easiest thing is to give nothing at all, and then the rule always
|
||||
applies. For example, this somewhat nihilistic program gets rid of colouring
|
||||
entirely:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">2. X can require the whole snippet to be of a particular colour, by writing
|
||||
<code class="display"><span class="extract">colour C</span></code>. For example:
|
||||
<p class="inwebparagraph">2. If X is a piece of literal text, the rule applies when the snippet is
|
||||
exactly that text. For example,
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">characters</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">colour</span><span class="plain"> !</span><span class="element">character</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">plain</span>
|
||||
<span class="string">printf</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">3. X can require the whole snippet to be of a particular colour, by writing
|
||||
<code class="display"><span class="extract">coloured C</span></code>. For example:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">coloured</span><span class="plain"> </span><span class="element">!character</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
<span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
@ -566,21 +819,23 @@ this somewhat nihilistic program gets rid of colouring entirely:
|
|||
<p class="inwebparagraph">removes the syntax colouring on character literals.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">3. X can require the snippet to be one of the language's known keywords, as
|
||||
<p class="inwebparagraph">4. X can require the snippet to be one of the language's known keywords, as
|
||||
declared earlier in the ILD by a <code class="display"><span class="extract">keyword</span></code> command. The syntax here is
|
||||
<code class="display"><span class="extract">keyword of C</span></code>, where <code class="display"><span class="extract">C</span></code> is a colour. For example:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">element</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!element</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">says: if the snippet is a keyword declared as being of colour <code class="display"><span class="extract">!element</span></code>,
|
||||
then actually colour it that way.
|
||||
then actually colour it that way. (This is much faster than making many
|
||||
comparison rules in a row, one for each keyword in the language; Inweb has
|
||||
put all of the registered keywords into a hash table for rapid lookup.)
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">4. X can look at a little context before or after the snippet, testing it
|
||||
<p class="inwebparagraph">5. X can look at a little context before or after the snippet, testing it
|
||||
with one of the following: <code class="display"><span class="extract">prefix P</span></code>, <code class="display"><span class="extract">spaced prefix P</span></code>,
|
||||
<code class="display"><span class="extract">optionally spaced prefix P</span></code>. These qualifiers have to do with whether white
|
||||
space must appear after <code class="display"><span class="extract">P</span></code> and before the snippet. For example,
|
||||
|
@ -588,8 +843,8 @@ space must appear after <code class="display"><span class="extract">P</span></co
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">prefix</span><span class="plain"> </span><span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> -> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">element</span>
|
||||
<span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">prefix</span><span class="plain"> </span><span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> -> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!element</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
|
@ -597,18 +852,103 @@ space must appear after <code class="display"><span class="extract">P</span></co
|
|||
as <code class="display"><span class="extract">!element</span></code>. Similarly for <code class="display"><span class="extract">suffix</span></code>.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">5. And otherwise X is literal text, and the rule applies if and only if
|
||||
the snippet is exactly that text. For example,
|
||||
<p class="inwebparagraph">6. X can test the snippet against a regular expression, with <code class="display"><span class="extract">matching /E/</span></code>.
|
||||
For example:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">printf</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">function</span>
|
||||
<span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">matching</span><span class="plain"> </span><span class="functiontext">/.*x.*/</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!element</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">...turns any identifier containing a lower-case <code class="display"><span class="extract">x</span></code> into <code class="display"><span class="extract">!element</span></code> colour.
|
||||
Note that <code class="display"><span class="extract">matching /x/</span></code> would not have worked, because our regular expression
|
||||
is required to match the entire snippet, not just somewhere inside.
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"0123456789"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">matching</span><span class="plain"> </span><span class="functiontext">/\d\d\d\d/</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!element</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">...colours all four-digit numbers, but no others.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph">7. Whenever a split takes place, Inweb keeps count of how many pieces there are,
|
||||
and different rules can apply to differently numbered pieces. The notation
|
||||
is <code class="display"><span class="extract">number N</span></code>, where <code class="display"><span class="extract">N</span></code> is the number, counting from 1. For example,
|
||||
</p>
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">Name: </span><span class="string">"ThirdExample"</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
<span class="plain"> </span><span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\S+/</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="string">number</span><span class="plain"> </span><span class="string">3</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">acts on:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">With how sad steps, O Moon, thou climb'st the skies!</span>
|
||||
<span class="plain">How silently, and with how wan a face!</span>
|
||||
<span class="plain">What, may it be that even in heav'nly place</span>
|
||||
<span class="plain">That busy archer his sharp arrows tries!</span>
|
||||
<span class="plain">Sure, if that long-with love-acquainted eyes</span>
|
||||
<span class="plain">Can judge of love, thou feel'st a lover's case,</span>
|
||||
<span class="plain">I read it in thy looks; thy languish'd grace</span>
|
||||
<span class="plain">To me, that feel the like, thy state descries.</span>
|
||||
<span class="plain">Then, ev'n of fellowship, O Moon, tell me,</span>
|
||||
<span class="plain">Is constant love deem'd there but want of wit?</span>
|
||||
<span class="plain">Are beauties there as proud as here they be?</span>
|
||||
<span class="plain">Do they above love to be lov'd, and yet</span>
|
||||
<span class="plain">Those lovers scorn whom that love doth possess?</span>
|
||||
<span class="plain">Do they call virtue there ungratefulness?</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph">to produce:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="plain">With how </span><span class="functiontext">sad</span><span class="plain"> steps, O Moon, thou climb'st the skies!</span>
|
||||
<span class="plain">How silently, </span><span class="functiontext">and</span><span class="plain"> with how wan a face!</span>
|
||||
<span class="plain">What, may </span><span class="functiontext">it</span><span class="plain"> be that even in heav'nly place</span>
|
||||
<span class="plain">That busy </span><span class="functiontext">archer</span><span class="plain"> his sharp arrows tries!</span>
|
||||
<span class="plain">Sure, if </span><span class="functiontext">that</span><span class="plain"> long-with love-acquainted eyes</span>
|
||||
<span class="plain">Can judge </span><span class="functiontext">of</span><span class="plain"> love, thou feel'st a lover's case,</span>
|
||||
<span class="plain">I read </span><span class="functiontext">it</span><span class="plain"> in thy looks; thy languish'd grace</span>
|
||||
<span class="plain">To me, </span><span class="functiontext">that</span><span class="plain"> feel the like, thy state descries.</span>
|
||||
<span class="plain">Then, ev'n </span><span class="functiontext">of</span><span class="plain"> fellowship, O Moon, tell me,</span>
|
||||
<span class="plain">Is constant </span><span class="functiontext">love</span><span class="plain"> deem'd there but want of wit?</span>
|
||||
<span class="plain">Are beauties </span><span class="functiontext">there</span><span class="plain"> as proud as here they be?</span>
|
||||
<span class="plain">Do they </span><span class="functiontext">above</span><span class="plain"> love to be lov'd, and yet</span>
|
||||
<span class="plain">Those lovers </span><span class="functiontext">scorn</span><span class="plain"> whom that love doth possess?</span>
|
||||
<span class="plain">Do they </span><span class="functiontext">call</span><span class="plain"> virtue there ungratefulness?</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph"></p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP24"></a><b>§24. </b>Now let's look at the conclusion Y of a rule. Here the possibilities are
|
||||
<p class="inwebparagraph"><a id="SP24"></a><b>§24. </b>Any condition can be reversed by preceding it with <code class="display"><span class="extract">not</span></code>. For example,
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="reserved">not</span><span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!string</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph"></p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP25"></a><b>§25. The three ways rules can take effect. </b>Now let's look at the conclusion Y of a rule. Here the possibilities are
|
||||
simpler:
|
||||
</p>
|
||||
|
||||
|
@ -621,9 +961,9 @@ applied to the snippet only if this rule has matched. For example,
|
|||
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">keyword</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> </span><span class="identifier">prefix</span><span class="plain"> . </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">element</span>
|
||||
<span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> </span><span class="identifier">prefix</span><span class="plain"> -> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">element</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> </span><span class="reserved">prefix</span><span class="plain"> . </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!element</span>
|
||||
<span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> </span><span class="reserved">prefix</span><span class="plain"> -> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!element</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
|
@ -637,55 +977,92 @@ the notation <code class="display"><span class="extract">=> C on both</span><
|
|||
</p>
|
||||
|
||||
<p class="inwebparagraph">3. If Y is the word <code class="display"><span class="extract">debug</span></code>, then the current snippet and its colouring
|
||||
are printed out on the command line.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP25"></a><b>§25. </b>The syntax of ILDs tends to avoid superfluous quotation marks as confusing,
|
||||
but sometimes you need to be pedantic. If you want to match the text <code class="display"><span class="extract">=></span></code>,
|
||||
for example, that could lead to ambiguity with the rule marker <code class="display"><span class="extract">=></span></code>. For
|
||||
such occasions, simply put the text in double quotes, and change any literal
|
||||
double quote in it to <code class="display"><span class="extract">\"</span></code>, and use <code class="display"><span class="extract">\\</span></code> for a literal backslash. For example:
|
||||
are printed out on the command line. Thus:
|
||||
</p>
|
||||
|
||||
|
||||
<pre class="display">
|
||||
<span class="string">"keyword"</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">reserved</span>
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\d\S+/</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="reserved">=></span><span class="plain"> </span><span class="reserved">debug</span>
|
||||
<span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
</pre>
|
||||
|
||||
<p class="inwebparagraph"></p>
|
||||
<p class="inwebparagraph">The rule <code class="display"><span class="extract">=> debug</span></code> is unconditional, and will print whenever it's reached.
|
||||
</p>
|
||||
|
||||
<p class="inwebparagraph"><a id="SP26"></a><b>§26. Example. </b>Inweb Language Definition Format is a kind of language in itself, and in
|
||||
<p class="inwebparagraph"><a id="SP26"></a><b>§26. The worm, Ouroboros. </b>Inweb Language Definition Format is a kind of language in itself, and in
|
||||
fact Inweb is supplied with an ILD for ILDF itself, which Inweb used to
|
||||
syntax-colour the examples above. Here it is, as syntax-coloured by itself:
|
||||
</p>
|
||||
|
||||
<pre class="display">
|
||||
<span class="identifier">Name</span><span class="plain">: </span><span class="identifier">ILDF</span>
|
||||
<span class="identifier">Details</span><span class="plain">: </span><span class="identifier">The</span><span class="plain"> </span><span class="identifier">Inweb</span><span class="plain"> </span><span class="identifier">Language</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain"> </span><span class="identifier">File</span><span class="plain"> </span><span class="identifier">format</span>
|
||||
<span class="identifier">Extension</span><span class="plain">: .</span><span class="identifier">ildf</span>
|
||||
<span class="identifier">Whole</span><span class="plain"> </span><span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain">: #</span>
|
||||
<span class="plain">Name: </span><span class="string">"ILDF"</span>
|
||||
<span class="plain">Details: </span><span class="string">"The Inweb Language Definition File format"</span>
|
||||
<span class="plain">Extension: </span><span class="string">".ildf"</span>
|
||||
<span class="plain">Whole Line Comment: </span><span class="string">"#"</span>
|
||||
<span class="plain">Supports Namespaces: </span><span class="reserved">false</span>
|
||||
|
||||
<span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: </span><span class="string">"\""</span>
|
||||
<span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
|
||||
<span class="plain">String Literal: </span><span class="string">"\""</span>
|
||||
<span class="plain">String Literal Escape: </span><span class="string">"\\"</span>
|
||||
|
||||
<span class="identifier">keyword</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span>
|
||||
<span class="plain">#</span><span class="comment"> Regular expressions are handled here as if character literals</span>
|
||||
<span class="plain">Character Literal: </span><span class="string">"/"</span>
|
||||
<span class="plain">Character Literal Escape: </span><span class="string">"\\"</span>
|
||||
|
||||
<span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="identifier">prefix</span><span class="plain"> ! </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">element</span>
|
||||
<span class="plain"> </span><span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">element</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"both"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"brackets"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"characters"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"coloured"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"colouring"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"debug"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"false"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"in"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"instances"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"keyword"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"matches"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"matching"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"not"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"of"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"on"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"optionally"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"prefix"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"runs"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"spaced"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"suffix"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"true"</span>
|
||||
<span class="reserved">keyword</span><span class="plain"> </span><span class="string">"unquoted"</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span>
|
||||
|
||||
<span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">prefix</span><span class="plain"> </span><span class="string">"!"</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">on</span><span class="plain"> </span><span class="reserved">both</span>
|
||||
<span class="plain"> </span><span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!element</span>
|
||||
<span class="plain"> </span><span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!reserved</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!reserved</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="plain"> </span><span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"=>"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">reserved</span>
|
||||
<span class="plain"> </span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"=>"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!reserved</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="plain"> </span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"{"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">reserved</span>
|
||||
<span class="plain"> </span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"{"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!reserved</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="plain"> </span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"}"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> !</span><span class="element">reserved</span>
|
||||
<span class="plain"> </span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"}"</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!reserved</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="plain"> </span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> #</span><span class="comment"> Anything left of these colours will be unquoted strings, so...</span>
|
||||
<span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!constant</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!string</span>
|
||||
<span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!string</span>
|
||||
<span class="plain"> #</span><span class="comment"> Regular expressions, now coloured !character, are more like functions</span>
|
||||
<span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!character</span><span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!function</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="plain"> #</span><span class="comment"> Detect Property: Value lines, not being fooled by a colon inside quotes</span>
|
||||
<span class="plain"> </span><span class="reserved">brackets</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="functiontext">/\s*([A-Z][^"]*):.*/</span><span class="plain"> </span><span class="reserved">{</span>
|
||||
<span class="plain"> #</span><span class="comment"> Uncolour only the bracketed part, i.e., the Property part</span>
|
||||
<span class="plain"> </span><span class="reserved">=></span><span class="plain"> </span><span class="element">!plain</span>
|
||||
<span class="plain"> </span><span class="reserved">}</span>
|
||||
<span class="reserved">}</span>
|
||||
|
||||
</pre>
|
||||
|
|
|
@ -131,6 +131,7 @@ typedef struct match_results {
|
|||
int no_matched_texts;
|
||||
struct match_result exp_storage[MAX_BRACKETED_SUBEXPRESSIONS];
|
||||
struct text_stream *exp[MAX_BRACKETED_SUBEXPRESSIONS];
|
||||
int exp_at[MAX_BRACKETED_SUBEXPRESSIONS];
|
||||
} match_results;
|
||||
|
||||
@ Match result objects are inherently ephemeral, and we can expect to be
|
||||
|
@ -143,8 +144,10 @@ deallocate.
|
|||
match_results Regexp::create_mr(void) {
|
||||
match_results mr;
|
||||
mr.no_matched_texts = 0;
|
||||
for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
|
||||
for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++) {
|
||||
mr.exp[i] = NULL;
|
||||
mr.exp_at[i] = -1;
|
||||
}
|
||||
return mr;
|
||||
}
|
||||
|
||||
|
@ -169,10 +172,27 @@ int Regexp::match(match_results *mr, text_stream *text, wchar_t *pattern) {
|
|||
return rv;
|
||||
}
|
||||
|
||||
int Regexp::match_from(match_results *mr, text_stream *text, wchar_t *pattern,
|
||||
int x, int allow_partial) {
|
||||
int match_to = x;
|
||||
if (x < Str::len(text)) {
|
||||
if (mr) Regexp::prepare(mr);
|
||||
match_position at;
|
||||
at.tpos = x; at.ppos = 0; at.bc = 0; at.bl = 0;
|
||||
match_to = Regexp::match_r(mr, text, pattern, &at, allow_partial);
|
||||
if (match_to == -1) {
|
||||
match_to = x;
|
||||
if (mr) Regexp::dispose_of(mr);
|
||||
}
|
||||
}
|
||||
return match_to - x;
|
||||
}
|
||||
|
||||
void Regexp::prepare(match_results *mr) {
|
||||
if (mr) {
|
||||
mr->no_matched_texts = 0;
|
||||
for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++) {
|
||||
mr->exp_at[i] = -1;
|
||||
if (mr->exp[i]) STREAM_CLOSE(mr->exp[i]);
|
||||
mr->exp_storage[i].match_text_struct =
|
||||
Streams::new_buffer(
|
||||
|
@ -276,6 +296,7 @@ to implement numeric repetition counts, which we won't need:
|
|||
Str::clear(mr->exp[i]);
|
||||
for (int j = at.brackets_start[i]; j <= at.brackets_end[i]; j++)
|
||||
PUT_TO(mr->exp[i], Str::get_at(text, j));
|
||||
mr->exp_at[i] = at.brackets_start[i];
|
||||
}
|
||||
mr->no_matched_texts = at.bc;
|
||||
}
|
||||
|
@ -290,8 +311,9 @@ says |q|, the only match is with a lower-case letter "q"), except that:
|
|||
(e) |%i| means any character from the identifier class (see above);
|
||||
(f) |%p| means any character which can be used in the name of a Preform
|
||||
nonterminal, which is to say, an identifier character or a hyphen;
|
||||
(g) |%P| means the same or else a colon.
|
||||
(h) |%t| means a tab.
|
||||
(g) |%P| means the same or else a colon;
|
||||
(h) |%t| means a tab;
|
||||
(i) |%q| means a double-quote.
|
||||
|
||||
|%| otherwise makes a literal escape; a space means any whitespace character;
|
||||
square brackets enclose literal alternatives, and note as usual with grep
|
||||
|
@ -330,9 +352,10 @@ int Regexp::get_cclass(wchar_t *pattern, int ppos, int *len, int *from, int *to,
|
|||
}
|
||||
*from = ppos; *to = ppos; return LITERAL_CLASS;
|
||||
case '[':
|
||||
*from = ppos+2;
|
||||
*from = ppos+1;
|
||||
ppos += 2;
|
||||
while ((pattern[ppos]) && (pattern[ppos] != ']')) ppos++;
|
||||
*to = ppos - 1; *len = ppos - *from + 1;
|
||||
*to = ppos - 1; *len = ppos - *from + 2;
|
||||
return LITERAL_CLASS;
|
||||
case ' ':
|
||||
*len = 1; return WHITESPACE_CLASS;
|
||||
|
@ -358,6 +381,9 @@ int Regexp::test_cclass(int c, int chcl, int range_from, int range_to, wchar_t *
|
|||
((c >= 'a') && (c <= 'z')) ||
|
||||
((c >= '0') && (c <= '9'))) match = TRUE; break;
|
||||
case LITERAL_CLASS:
|
||||
if ((range_to > range_from) && (drawn_from[range_from] == '^')) {
|
||||
range_from++; reverse = reverse?FALSE:TRUE;
|
||||
}
|
||||
for (int j = range_from; j <= range_to; j++) {
|
||||
int c1 = drawn_from[j], c2 = c1;
|
||||
if ((j+1 < range_to) && (drawn_from[j+1] == '-')) { c2 = drawn_from[j+2]; j += 2; }
|
||||
|
|
Loading…
Reference in a new issue