Largely completed baseline version of ILDF

2020-04-06 19:24:56 +01:00 · 2020-04-06 19:24:56 +01:00 · 37b01a8d89
commit 37b01a8d89
parent aeb670fc9a
29 changed files with 2170 additions and 916 deletions
--- a/Parser.w
+++ b/Parser.w
@ -389,7 +389,7 @@ division in the current section.
 			extract_mode = TRUE;
 		} else if ((current_paragraph) && (Regexp::match(&mr2, mr.exp[0], L"%(sample (%c+) code%)"))) {
 			code_lcat_for_body = TEXT_EXTRACT_LCAT;
-			code_pl_for_body = Languages::find_by_name(mr2.exp[0]);
+			code_pl_for_body = Languages::find_by_name(mr2.exp[0], W);
 			extract_mode = TRUE;
 		} else if ((current_paragraph) && (Regexp::match(&mr2, mr.exp[0], L"%(sample code%)"))) {
 			code_lcat_for_body = TEXT_EXTRACT_LCAT;
--- a/Reader.w
+++ b/Reader.w
@ -146,11 +146,11 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
 	W->analysed = FALSE;
 	W->as_ebook = NULL;
 	W->redirect_weaves_to = NULL;
-	W->main_language = Languages::default();
+	W->main_language = Languages::default(W);
 	W->no_lines = 0; W->no_paragraphs = 0; 
 	text_stream *language_name = Bibliographic::get_datum(W->md, I"Language");
 	if (Str::len(language_name) > 0)
-		W->main_language = Languages::find_by_name(language_name);
+		W->main_language = Languages::find_by_name(language_name, W);
 	main_target = Reader::add_tangle_target(W, W->main_language);

@<Initialise the rest of the chapter structure@> =
@ -159,7 +159,7 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
 	C->sections = NEW_LINKED_LIST(section);
 	C->ch_language = W->main_language;
 	if (Str::len(Cm->ch_language_name) > 0)
-		C->ch_language = Languages::find_by_name(Cm->ch_language_name);
+		C->ch_language = Languages::find_by_name(Cm->ch_language_name, W);

@<Initialise the rest of the section structure@> =
 	S->sect_extent = 0;
@ -176,10 +176,10 @@ web *Reader::load_web(pathname *P, filename *alt_F, module_search *I, int verbos
 	S->owning_web = W;
 	S->sect_language = C->ch_language;
 	if (Str::len(S->md->sect_language_name) > 0)
-		S->sect_language = Languages::find_by_name(S->md->sect_language_name);
+		S->sect_language = Languages::find_by_name(S->md->sect_language_name, W);
 	if (Str::len(S->md->sect_independent_language) > 0) {
 		programming_language *pl =
-			Languages::find_by_name(S->md->sect_independent_language);
+			Languages::find_by_name(S->md->sect_independent_language, W);
 		S->sect_language = pl;
 		S->sect_target = Reader::add_tangle_target(W, pl);
 	} else {
--- a/Weaver.w
+++ b/Weaver.w
@ -185,7 +185,7 @@ at us; but we don't weave them into the output, that's for sure.
 	} else if (Regexp::match(&mr, figname, L"(%c+) as (%c+)")) {
 		if (S->md->using_syntax < V2_SYNTAX)
 			Parser::wrong_version(S->md->using_syntax, L, "[[F as L]]", V2_SYNTAX);
-		programming_language *pl = Languages::find_by_name(mr.exp[1]);
+		programming_language *pl = Languages::find_by_name(mr.exp[1], W);
 		Formats::figure(OUT, wv, mr.exp[0], -1, -1, pl);
 	} else {
 		Formats::figure(OUT, wv, figname, -1, -1, NULL);
--- a/4/Programming
+++ b/4/Programming
@ -7,7 +7,7 @@ definitions from files.
 Programming languages are identified by name: for example, |C++| or |Perl|.

@ =
-programming_language *Languages::find_by_name(text_stream *lname) {
+programming_language *Languages::find_by_name(text_stream *lname, web *W) {
 	programming_language *pl;
 	@<If this is the name of a language already known, return that@>;
 	@<Read the language definition file with this name@>;
@ -23,21 +23,32 @@ programming_language *Languages::find_by_name(text_stream *lname) {
 			return pl;

@<Read the language definition file with this name@> =
+	filename *F = NULL;
+	if (W) {
+		pathname *P = Pathnames::subfolder(W->md->path_to_web, I"Private Languages");
+		@<Try P@>;
+	}
 	pathname *P = Languages::default_directory();
-	TEMPORARY_TEXT(leaf);
-	WRITE_TO(leaf, "%S.ildf", lname);
-	filename *F = Filenames::in_folder(P, leaf);
-	DISCARD_TEXT(leaf);
-	if (TextFiles::exists(F) == FALSE)
+	@<Try P@>;
+	if (F == NULL)
 		Errors::fatal_with_text(
 			"unsupported programming language '%S'", lname);
 	pl = Languages::read_definition(F);

+@<Try P@> =
+	if (F == NULL) {
+		TEMPORARY_TEXT(leaf);
+		WRITE_TO(leaf, "%S.ildf", lname);
+		F = Filenames::in_folder(P, leaf);
+		DISCARD_TEXT(leaf);
+		if (TextFiles::exists(F) == FALSE) F = NULL;
+	}
+
@ I'm probably showing my age here.

 =
-programming_language *Languages::default(void) {
-	return Languages::find_by_name(I"C");
+programming_language *Languages::default(web *W) {
+	return Languages::find_by_name(I"C", W);
 }

 void Languages::show(OUTPUT_STREAM) {
@ -210,9 +221,9 @@ declare a reserved keyword, or set a key to a value.
 		pl->program = Languages::new_block(NULL, WHOLE_LINE_CRULE_RUN);
 		state->current_block = pl->program;
 	} else if (Regexp::match(&mr, line, L"keyword (%C+) of (%c+?)")) {
-		Languages::reserved(pl, mr.exp[0], Languages::colour(mr.exp[1], tfp), tfp);
+		Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), Languages::colour(mr.exp[1], tfp), tfp);
 	} else if (Regexp::match(&mr, line, L"keyword (%C+)")) {
-		Languages::reserved(pl, mr.exp[0], RESERVED_COLOUR, tfp);
+		Languages::reserved(pl, Languages::text(mr.exp[0], tfp, FALSE), RESERVED_COLOUR, tfp);
 	} else if (Regexp::match(&mr, line, L"(%c+) *: *(%c+?)")) {
 		text_stream *key = mr.exp[0], *value = Str::duplicate(mr.exp[1]);
 		if (Str::eq(key, I"Name")) pl->language_name = Languages::text(value, tfp, TRUE);
@ -291,6 +302,12 @@ runs of a given colour, or give an if-X-then-Y rule:
 		rule->execute_block =
 			Languages::new_block(state->current_block, CHARACTERS_CRULE_RUN);
 		state->current_block = rule->execute_block;
+	} else if (Regexp::match(&mr, line, L"characters in (%c+) {")) {
+		colouring_rule *rule = Languages::new_rule(state->current_block);
+		rule->execute_block =
+			Languages::new_block(state->current_block, CHARACTERS_IN_CRULE_RUN);
+		rule->execute_block->char_set = Languages::text(mr.exp[0], tfp, FALSE);
+		state->current_block = rule->execute_block;
 	} else if (Regexp::match(&mr, line, L"runs of (%c+) {")) {
 		colouring_rule *rule = Languages::new_rule(state->current_block);
 		int r = UNQUOTED_COLOUR;
@ -302,6 +319,16 @@ runs of a given colour, or give an if-X-then-Y rule:
 		rule->execute_block = Languages::new_block(state->current_block, INSTANCES_CRULE_RUN);
 		rule->execute_block->run_instance = Languages::text(mr.exp[0], tfp, FALSE);
 		state->current_block = rule->execute_block;
+	} else if (Regexp::match(&mr, line, L"matches of (%c+) {")) {
+		colouring_rule *rule = Languages::new_rule(state->current_block);
+		rule->execute_block = Languages::new_block(state->current_block, MATCHES_CRULE_RUN);
+		Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
+		state->current_block = rule->execute_block;
+	} else if (Regexp::match(&mr, line, L"brackets in (%c+) {")) {
+		colouring_rule *rule = Languages::new_rule(state->current_block);
+		rule->execute_block = Languages::new_block(state->current_block, BRACKETS_CRULE_RUN);
+		Languages::regexp(rule->execute_block->match_regexp_text, mr.exp[0], tfp);
+		state->current_block = rule->execute_block;
 	} else {
 		int at = -1, quoted = FALSE;
 		for (int i=0; i<Str::len(line)-1; i++) {
@ -329,7 +356,10 @@ represents a complete program.

@d WHOLE_LINE_CRULE_RUN -1 /* This block applies to the whole snippet being coloured */
@d CHARACTERS_CRULE_RUN -2 /* This block applies to each character in turn */
-@d INSTANCES_CRULE_RUN -3 /* This block applies to each instance in turn */
+@d CHARACTERS_IN_CRULE_RUN -3 /* This block applies to each character from a set in turn */
+@d INSTANCES_CRULE_RUN -4 /* This block applies to each instance in turn */
+@d MATCHES_CRULE_RUN -5 /* This block applies to each match against a regexp in turn */
+@d BRACKETS_CRULE_RUN -6 /* This block applies to bracketed subexpressions in a regexp */

 =
 typedef struct colouring_language_block {
@ -337,6 +367,11 @@ typedef struct colouring_language_block {
 	struct colouring_language_block *parent; /* or |NULL| for the topmost one */
 	int run; /* one of the |*_CRULE_RUN| values, or else a colour */
 	struct text_stream *run_instance; /* used only for |INSTANCES_CRULE_RUN| */
+	struct text_stream *char_set; /* used only for |CHARACTERS_IN_CRULE_RUN| */
+	wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH]; /* used for |MATCHES_CRULE_RUN|, |BRACKETS_CRULE_RUN| */
+	
+	/* workspace during painting */
+	struct match_results mr; /* of a regular expression */
 	MEMORY_MANAGEMENT
 } colouring_language_block;

@ -347,6 +382,9 @@ colouring_language_block *Languages::new_block(colouring_language_block *within,
 	block->parent = within;
 	block->run = r;
 	block->run_instance = NULL;
+	block->char_set = NULL;
+	block->match_regexp_text[0] = 0;
+	block->mr = Regexp::create_mr();
 	return block;
 }

@ -365,13 +403,18 @@ Note that rules can be unconditional, in that the premiss always passes.
@d SPACED_RULE_SUFFIX 6 /* for |spaced suffix P| */
@d OPTIONALLY_SPACED_RULE_SUFFIX 7 /* for |optionally spaced suffix P| */

+@d MAX_ILDF_REGEXP_LENGTH 64
+
 =
 typedef struct colouring_rule {
 	/* the premiss: */
-	int match_colour; /* for |colour C|, or else |NOT_A_COLOUR| */
+	int sense; /* |FALSE| to negate the condition */
+	int match_colour; /* for |coloured C|, or else |NOT_A_COLOUR| */
 	int match_keyword_of_colour; /* for |keyword C|, or else |NOT_A_COLOUR| */
 	struct text_stream *match_text; /* or length 0 to mean "anything" */
 	int match_prefix; /* one of the |*_RULE_PREFIX| values above */
+	wchar_t match_regexp_text[MAX_ILDF_REGEXP_LENGTH];
+	int number; /* for |number N| rules; 0 for others */

 	/* the conclusion: */
 	struct colouring_language_block *execute_block; /* or |NULL|, in which case... */
@ -381,6 +424,7 @@ typedef struct colouring_rule {
 	
 	/* workspace during painting */
 	int fix_position; /* where the prefix or suffix started */
+	struct match_results mr; /* of a regular expression */
 	MEMORY_MANAGEMENT
 } colouring_rule;

@ -389,15 +433,21 @@ colouring_rule *Languages::new_rule(colouring_language_block *within) {
 	if (within == NULL) internal_error("rule outside block");
 	colouring_rule *rule = CREATE(colouring_rule);
 	ADD_TO_LINKED_LIST(rule, colouring_rule, within->rules);
+	rule->sense = TRUE;
 	rule->match_colour = NOT_A_COLOUR;
 	rule->match_text = NULL;
 	rule->match_prefix = NOT_A_RULE_PREFIX;
 	rule->match_keyword_of_colour = NOT_A_COLOUR;
+	rule->match_regexp_text[0] = 0;
+	rule->number = 0;

 	rule->set_to_colour = NOT_A_COLOUR;
 	rule->set_prefix_to_colour = NOT_A_COLOUR;
 	rule->execute_block = NULL;
 	rule->debug = FALSE;
+	
+	rule->fix_position = 0;
+	rule->mr = Regexp::create_mr();
 	return rule;
 }

@ -413,13 +463,21 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
 }

@<Parse the premiss@> =
-	if (Regexp::match(&mr, premiss, L"keyword of (%c+)")) {
+	while (Regexp::match(&mr, premiss, L"not (%c+)")) {
+		rule->sense = (rule->sense)?FALSE:TRUE;
+		Str::clear(premiss); Str::copy(premiss, mr.exp[0]);
+	}
+	if (Regexp::match(&mr, premiss, L"number (%d+)")) {
+		rule->number = Str::atoi(mr.exp[0], 0);
+	} else if (Regexp::match(&mr, premiss, L"keyword of (%c+)")) {
 		rule->match_keyword_of_colour = Languages::colour(mr.exp[0], tfp);
 	} else if (Regexp::match(&mr, premiss, L"keyword")) {
 		Errors::in_text_file("ambiguous: make it keyword of !reserved or \"keyword\"", tfp);
 	} else if (Regexp::match(&mr, premiss, L"prefix (%c+)")) {
 		rule->match_prefix = UNSPACED_RULE_PREFIX;
 		rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
+	} else if (Regexp::match(&mr, premiss, L"matching (%c+)")) {
+		Languages::regexp(rule->match_regexp_text, mr.exp[0], tfp);
 	} else if (Regexp::match(&mr, premiss, L"spaced prefix (%c+)")) {
 		rule->match_prefix = SPACED_RULE_PREFIX;
 		rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
@ -435,7 +493,7 @@ void Languages::parse_rule(language_reader_state *state, text_stream *premiss,
 	} else if (Regexp::match(&mr, premiss, L"optionally spaced suffix (%c+)")) {
 		rule->match_prefix = OPTIONALLY_SPACED_RULE_SUFFIX;
 		rule->match_text = Languages::text(mr.exp[0], tfp, FALSE);
-	} else if (Regexp::match(&mr, premiss, L"colou*r (%c+)")) {
+	} else if (Regexp::match(&mr, premiss, L"coloured (%c+)")) {
 		rule->match_colour = Languages::colour(mr.exp[0], tfp);
 	} else if (Str::len(premiss) > 0) {
 		rule->match_text = Languages::text(premiss, tfp, FALSE);
@ -575,6 +633,12 @@ text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow)
 			} else if ((bareword == FALSE) && (c == '"')) {
 				Errors::in_text_file(
 					"backslash needed before internal double-quotation mark", tfp);
+			} else if ((bareword) && (c == '!') && (i == from)) {
+				Errors::in_text_file(
+					"a literal starting with ! must be in double-quotation marks", tfp);
+			} else if ((bareword) && (c == '/')) {
+				Errors::in_text_file(
+					"forward slashes can only be used in quoted strings", tfp);
 			} else if ((bareword) && (c == '"')) {
 				Errors::in_text_file(
 					"double-quotation marks can only be used in quoted strings", tfp);
@ -588,6 +652,102 @@ text_stream *Languages::text(text_stream *T, text_file_position *tfp, int allow)
 			Errors::in_text_file_S(err, tfp);
 			DISCARD_TEXT(err);			
 		}
+		if (bareword) {
+			int rw = FALSE;
+			if (Str::eq(V, I"both")) rw = TRUE;
+			if (Str::eq(V, I"brackets")) rw = TRUE;
+			if (Str::eq(V, I"characters")) rw = TRUE;
+			if (Str::eq(V, I"coloured")) rw = TRUE;
+			if (Str::eq(V, I"colouring")) rw = TRUE;
+			if (Str::eq(V, I"debug")) rw = TRUE;
+			if (Str::eq(V, I"false")) rw = TRUE;
+			if (Str::eq(V, I"in")) rw = TRUE;
+			if (Str::eq(V, I"instances")) rw = TRUE;
+			if (Str::eq(V, I"keyword")) rw = TRUE;
+			if (Str::eq(V, I"matches")) rw = TRUE;
+			if (Str::eq(V, I"matching")) rw = TRUE;
+			if (Str::eq(V, I"not")) rw = TRUE;
+			if (Str::eq(V, I"of")) rw = TRUE;
+			if (Str::eq(V, I"on")) rw = TRUE;
+			if (Str::eq(V, I"optionally")) rw = TRUE;
+			if (Str::eq(V, I"prefix")) rw = TRUE;
+			if (Str::eq(V, I"runs")) rw = TRUE;
+			if (Str::eq(V, I"spaced")) rw = TRUE;
+			if (Str::eq(V, I"suffix")) rw = TRUE;
+			if (Str::eq(V, I"true")) rw = TRUE;
+			if (Str::eq(V, I"unquoted")) rw = TRUE;
+
+			if (rw) {
+				TEMPORARY_TEXT(err);
+				WRITE_TO(err, "'%S' is a reserved word, so you should put it in double-quotation marks", V);
+				Errors::in_text_file_S(err, tfp);
+				DISCARD_TEXT(err);			
+			}
+		}
 	}
 	return V;
 }
+
+@ And regular expressions.
+
+=
+void Languages::regexp(wchar_t *write_to, text_stream *T, text_file_position *tfp) {
+	if (write_to == NULL) internal_error("no buffer");
+	write_to[0] = 0;
+	if (Str::len(T) > 0) {
+		int from = 0, to = Str::len(T)-1, x = 0;
+		if ((to > from) &&
+			(Str::get_at(T, from) == '/') && (Str::get_at(T, to) == '/')) {
+			from++; to--;
+			for (int i=from; i<=to; i++) {
+				wchar_t c = Str::get_at(T, i);
+				if (c == '\\') {
+					wchar_t w = Str::get_at(T, i+1);
+					if (w == '\\') {
+						x = Languages::add_to_regexp(write_to, x, w);
+					} else if (w == 'd') {
+						x = Languages::add_escape_to_regexp(write_to, x, 'd');
+					} else if (w == 't') {
+						x = Languages::add_escape_to_regexp(write_to, x, 't');
+					} else if (w == 's') {
+						x = Languages::add_to_regexp(write_to, x, ' ');
+					} else if (w == 'S') {
+						x = Languages::add_escape_to_regexp(write_to, x, 'C');
+					} else if (w == '"') {
+						x = Languages::add_escape_to_regexp(write_to, x, 'q');
+					} else {
+						x = Languages::add_escape_to_regexp(write_to, x, w);
+					}
+					i++;
+					continue;
+				}
+				if (c == '.') {
+					x = Languages::add_escape_to_regexp(write_to, x, 'c');
+					continue;
+				}
+				if (c == '%') {
+					x = Languages::add_escape_to_regexp(write_to, x, '%');
+					continue;
+				}
+				x = Languages::add_to_regexp(write_to, x, c);
+			}
+		} else {
+			Errors::in_text_file(
+				"the expression to match must be in slashes '/'", tfp);
+		}
+		if (x >= MAX_ILDF_REGEXP_LENGTH)
+			Errors::in_text_file(
+				"the expression to match is too long", tfp);
+	}
+}
+
+int Languages::add_to_regexp(wchar_t *write_to, int i, wchar_t c) {
+	if (i < MAX_ILDF_REGEXP_LENGTH) write_to[i++] = c;
+	return i;
+}
+
+int Languages::add_escape_to_regexp(wchar_t *write_to, int i, wchar_t c) {
+	i = Languages::add_to_regexp(write_to, i, '%');
+	i = Languages::add_to_regexp(write_to, i, c);
+	return i;
+}
--- a/Painter.w
+++ b/Painter.w
@ -207,24 +207,53 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
 	LOOP_OVER_LINKED_LIST(rule, colouring_rule, block->rules) {
 		switch (block->run) {
 			case WHOLE_LINE_CRULE_RUN:
-				Painter::execute_rule(HT, rule, matter, colouring, from, to);
+				Painter::execute_rule(HT, rule, matter, colouring, from, to, 1);
 				break;
 			case CHARACTERS_CRULE_RUN:
 				for (int i=from; i<=to; i++)
-					Painter::execute_rule(HT, rule, matter, colouring, i, i);
+					Painter::execute_rule(HT, rule, matter, colouring, i, i, i-from+1);
+				break;
+			case CHARACTERS_IN_CRULE_RUN:
+				for (int count=1, i=from; i<=to; i++)
+					for (int j=0; j<Str::len(block->char_set); j++)
+						if (Str::get_at(matter, i) == Str::get_at(block->char_set, j) ) {
+							Painter::execute_rule(HT, rule, matter, colouring, i, i, count++);
+							break;
+						}
 				break;
 			case INSTANCES_CRULE_RUN: {
 				int L = Str::len(block->run_instance) - 1;
 				if (L >= 0)
-					for (int i=from; i<=to - L; i++)
+					for (int count=1, i=from; i<=to - L; i++)
 						if (ACMESupport::text_at(matter, i, block->run_instance)) {
-							Painter::execute_rule(HT, rule, matter, colouring, i, i+L);
+							Painter::execute_rule(HT, rule, matter, colouring, i, i+L, count++);
 							i += L;
 						}
 				break;
 			}
+			case MATCHES_CRULE_RUN:
+				for (int count=1, i=from; i<=to; i++) {
+					int L = Regexp::match_from(&(block->mr), matter, block->match_regexp_text, i, TRUE);
+					if (L > 0) {
+						Painter::execute_rule(HT, rule, matter, colouring, i, i+L-1, count++);
+						i += L-1;
+					}
+				}
+				break;
+			case BRACKETS_CRULE_RUN:
+				for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
+					if (block->mr.exp[i])
+						Str::clear(block->mr.exp[i]);
+				if (Regexp::match(&(block->mr), matter, block->match_regexp_text))
+					for (int count=1, i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
+						if (block->mr.exp_at[i] >= 0)
+							Painter::execute_rule(HT, rule, matter, colouring,
+								block->mr.exp_at[i],
+								block->mr.exp_at[i] + Str::len(block->mr.exp[i])-1,
+								count++);
+				break;
 			default: {
-				int ident_from = -1;
+				int ident_from = -1, count = 1;
 				for (int i=from; i<=to; i++) {
 					int col = Str::get_at(colouring_at_start, i);
 					if ((col == block->run) ||
@ -233,12 +262,12 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre
 						if (ident_from == -1) ident_from = i;
 					} else {
 						if (ident_from >= 0)
-							Painter::execute_rule(HT, rule, matter, colouring, ident_from, i-1);
+							Painter::execute_rule(HT, rule, matter, colouring, ident_from, i-1, count++);
 						ident_from = -1;
 					}
 				}
 				if (ident_from >= 0)
-					Painter::execute_rule(HT, rule, matter, colouring, ident_from, to);
+					Painter::execute_rule(HT, rule, matter, colouring, ident_from, to, count++);
 				break;
 			}
 		}
@ -250,8 +279,8 @@ void Painter::execute(hash_table *HT, colouring_language_block *block, text_stre

 =
 void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *matter,
-	text_stream *colouring, int from, int to) {
-	if (Painter::satisfies(HT, rule, matter, colouring, from, to))
+	text_stream *colouring, int from, int to, int N) {
+	if (Painter::satisfies(HT, rule, matter, colouring, from, to, N) == rule->sense)
 		Painter::follow(HT, rule, matter, colouring, from, to);
 }

@ -267,8 +296,13 @@ void Painter::execute_rule(hash_table *HT, colouring_rule *rule, text_stream *ma

 =
 int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter,
-	text_stream *colouring, int from, int to) {
-	if (Str::len(rule->match_text) > 0) {
+	text_stream *colouring, int from, int to, int N) {
+	if (rule->number > 0) {
+		if (rule->number != N) return FALSE;
+	} else if (rule->match_regexp_text[0]) {
+		if (Regexp::match(&(rule->mr), matter, rule->match_regexp_text) == FALSE)
+			return FALSE;
+	} else if (Str::len(rule->match_text) > 0) {
 		if ((rule->match_prefix == UNSPACED_RULE_PREFIX) ||
 			(rule->match_prefix == SPACED_RULE_PREFIX) ||
 			(rule->match_prefix == OPTIONALLY_SPACED_RULE_PREFIX)) {
@ -295,7 +329,11 @@ int Painter::satisfies(hash_table *HT, colouring_rule *rule, text_stream *matter
 				return FALSE;
 			rule->fix_position = pos;
 		} else {
-			if (Str::ne(matter, rule->match_text)) return FALSE;
+			if (Str::len(rule->match_text) != to-from+1)
+				return FALSE;
+			for (int i=from; i<=to; i++)
+				if (Str::get_at(matter, i) != Str::get_at(rule->match_text, i-from))
+					return FALSE;
 		}
 	} else if (rule->match_keyword_of_colour != NOT_A_COLOUR) {
 		TEMPORARY_TEXT(id);
--- a/Languages/ACME.ildf
+++ b/Languages/ACME.ildf
@ -1,14 +1,14 @@
-Name: ACME
-Details: The ACME assembly language for 6502 and related CPUs
-Extension: .a
-Line Comment: ;
+Name: "ACME"
+Details: "The ACME assembly language for 6502 and related CPUs"
+Extension: ".a"
+Line Comment: ";"
 String Literal: "\""
-String Literal Escape: \
-Character Literal: '
-Character Literal Escape: \
-Binary Literal Prefix: %
-Hexadecimal Literal Prefix: $
-Negative Literal Prefix: -
+String Literal Escape: "\\"
+Character Literal: "'"
+Character Literal Escape: "\\"
+Binary Literal Prefix: "%"
+Hexadecimal Literal Prefix: "$"
+Negative Literal Prefix: "-"

 colouring {
 	runs of unquoted {
--- a/Languages/C++.ildf
+++ b/Languages/C++.ildf
@ -1,34 +1,34 @@
-Name: C++
-Details: The C++ programming language
-Extension: .cpp
-Multiline Comment Open: /*
-Multiline Comment Close: */
-Line Comment: //
+Name: "C++"
+Details: "The C++ programming language"
+Extension: ".cpp"
+Multiline Comment Open: "/*"
+Multiline Comment Close: "*/"
+Line Comment: "//"
 String Literal: "\""
-String Literal Escape: \
-Character Literal: '
-Character Literal Escape: \
+String Literal Escape: "\\"
+Character Literal: "'"
+Character Literal Escape: "\\"
 C-Like: true

 # C++ does in fact support octal literals, marking them as starting with an
 # unnecessary initial zero. This is practically obsolete now, and in any case
 # makes no difference to syntax-colouring.

-Hexadecimal Literal Prefix: 0x
-Binary Literal Prefix: 0b
-Negative Literal Prefix: -
+Hexadecimal Literal Prefix: "0x"
+Binary Literal Prefix: "0b"
+Negative Literal Prefix: "-"

-Before Named Paragraph Expansion: \n{\n
-After Named Paragraph Expansion: }\n
-Start Ifdef: #ifdef %S\n
-End Ifdef: #endif /* %S */\n
-Start Ifndef: #ifndef %S\n
-End Ifndef: #endif /* %S */\n
+Before Named Paragraph Expansion: "\n{\n"
+After Named Paragraph Expansion: "}\n"
+Start Ifdef: "#ifdef %S\n"
+End Ifdef: "#endif /* %S */\n"
+Start Ifndef: "#ifndef %S\n"
+End Ifndef: "#endif /* %S */\n"
 Line Marker: "#line %d \"%f\"\n"

-Start Definition: #define %S\s
-Prolong Definition: \\\n\s\s\s\s
-End Definition: \n
+Start Definition: "#define %S\s"
+Prolong Definition: "\\\n\s\s\s\s"
+End Definition: "\n"

 keyword auto
 keyword break
--- a/Languages/C.ildf
+++ b/Languages/C.ildf
@ -1,13 +1,13 @@
-Name: C
-Details: The C programming language
-Extension: .c
-Multiline Comment Open: /*
-Multiline Comment Close: */
-Line Comment: //
+Name: "C"
+Details: "The C programming language"
+Extension: ".c"
+Multiline Comment Open: "/*"
+Multiline Comment Close: "*/"
+Line Comment: "//"
 String Literal: "\""
-String Literal Escape: \
-Character Literal: '
-Character Literal Escape: \
+String Literal Escape: "\\"
+Character Literal: "'"
+Character Literal Escape: "\\"
 C-Like: true

 # C does in fact support octal literals, marking them as starting with an
@ -16,21 +16,21 @@ C-Like: true
 # rejected by the C standards body as useless, but are so useful that gcc
 # and clang support them anyway.

-Hexadecimal Literal Prefix: 0x
-Binary Literal Prefix: 0b
-Negative Literal Prefix: -
+Hexadecimal Literal Prefix: "0x"
+Binary Literal Prefix: "0b"
+Negative Literal Prefix: "-"

-Before Named Paragraph Expansion: \n{\n
-After Named Paragraph Expansion: }\n
-Start Ifdef: #ifdef %S\n
-End Ifdef: #endif /* %S */\n
-Start Ifndef: #ifndef %S\n
-End Ifndef: #endif /* %S */\n
+Before Named Paragraph Expansion: "\n{\n"
+After Named Paragraph Expansion: "}\n"
+Start Ifdef: "#ifdef %S\n"
+End Ifdef: "#endif /* %S */\n"
+Start Ifndef: "#ifndef %S\n"
+End Ifndef: "#endif /* %S */\n"
 Line Marker: "#line %d \"%f\"\n"

-Start Definition: #define %S\s
-Prolong Definition: \\\n\s\s\s\s
-End Definition: \n
+Start Definition: "#define %S\s"
+Prolong Definition: "\\\n\s\s\s\s"
+End Definition: "\n"

 keyword auto
 keyword break
--- a/Languages/ILDF.ildf
+++ b/Languages/ILDF.ildf
@ -1,17 +1,44 @@
-Name: ILDF
-Details: The Inweb Language Definition File format
-Extension: .ildf
-Whole Line Comment: #
+Name: "ILDF"
+Details: "The Inweb Language Definition File format"
+Extension: ".ildf"
+Whole Line Comment: "#"
+Supports Namespaces: false

 String Literal: "\""
-String Literal Escape: \
+String Literal Escape: "\\"

-keyword unquoted of !element
+# Regular expressions are handled here as if character literals
+Character Literal: "/"
+Character Literal Escape: "\\"
+
+keyword "both"
+keyword "brackets"
+keyword "characters"
+keyword "coloured"
+keyword "colouring"
+keyword "debug"
+keyword "false"
+keyword "in"
+keyword "instances"
+keyword "keyword"
+keyword "matches"
+keyword "matching"
+keyword "not"
+keyword "of"
+keyword "on"
+keyword "optionally"
+keyword "prefix"
+keyword "runs"
+keyword "spaced"
+keyword "suffix"
+keyword "true"
+keyword "unquoted" of !element

 colouring {
 	runs of !identifier {
-		prefix ! => !element
+		prefix "!" => !element on both
 		keyword of !element => !element
+		keyword of !reserved => !reserved
 	}
 	runs of unquoted {
 		instances of "=>" {
@ -24,4 +51,16 @@ colouring {
 			=> !reserved
 		}
 	}
+	characters {
+		# Anything left of these colours will be unquoted strings, so...
+		coloured !constant => !string
+		coloured !identifier => !string
+		# Regular expressions, now coloured !character, are more like functions
+		coloured !character => !function
+	}
+	# Detect Property: Value lines, not being fooled by a colon inside quotes
+	brackets in /\s*([A-Z][^"]*):.*/ {
+		# Uncolour only the bracketed part, i.e., the Property part
+		=> !plain
+	}
 }
--- a/Languages/InC.ildf
+++ b/Languages/InC.ildf
@ -1,14 +1,14 @@
-Name: InC
-Details: The Inform-tools extension to the C programming language
-Extension: .c
+Name: "InC"
+Details: "The Inform-tools extension to the C programming language"
+Extension: ".c"
 Supports Namespaces: true
-Multiline Comment Open: /*
-Multiline Comment Close: */
-Line Comment: //
+Multiline Comment Open: "/*"
+Multiline Comment Close: "*/"
+Line Comment: "//"
 String Literal: "\""
-String Literal Escape: \
-Character Literal: '
-Character Literal Escape: \
+String Literal Escape: "\\"
+Character Literal: "'"
+Character Literal Escape: "\\"
 C-Like: true

 # C does in fact support octal literals, marking them as starting with an
@ -17,30 +17,28 @@ C-Like: true
 # rejected by the C standards body as useless, but are so useful that gcc
 # and clang support them anyway.

-Hexadecimal Literal Prefix: 0x
-Binary Literal Prefix: 0b
-Negative Literal Prefix: -
+Hexadecimal Literal Prefix: "0x"
+Binary Literal Prefix: "0b"
+Negative Literal Prefix: "-"

 # The "shebang" routine for a language is called to add anything it wants to
 # at the very top of the tangled code. (For a scripting language such as
-# Perl or Python, that might be a shebang: hence the name.)
+# Perl or Python, that might be a shebang: "hence the name.)"
 # But we will use it to defime the constant PLATFORM_POSIX everywhere except
 # Windows. This needs to happen right at the top, because the "very early
 # code" in a tangle may contain material conditional on whether it is defined.

-Shebang: #ifndef PLATFORM_WINDOWS\n#define PLATFORM_POSIX\n#endif\n
-
-Before Named Paragraph Expansion: \n{\n
-After Named Paragraph Expansion: }\n
-Start Ifdef: #ifdef %S\n
-End Ifdef: #endif /* %S */\n
-Start Ifndef: #ifndef %S\n
-End Ifndef: #endif /* %S */\n
+Shebang: "#ifndef PLATFORM_WINDOWS\n#define PLATFORM_POSIX\n#endif\n"
+Before Named Paragraph Expansion: "\n{\n"
+After Named Paragraph Expansion: "}\n"
+Start Ifdef: "#ifdef %S\n"
+End Ifdef: "#endif /* %S */\n"
+Start Ifndef: "#ifndef %S\n"
+End Ifndef: "#endif /* %S */\n"
 Line Marker: "#line %d \"%f\"\n"
-
-Start Definition: #define %S\s
-Prolong Definition: \\\n\s\s\s\s
-End Definition: \n
+Start Definition: "#define %S\s"
+Prolong Definition: "\\\n\s\s\s\s"
+End Definition: "\n"

 # FILE gets in even though it's not technically reserved but only a type
 # name, defined in the standard C library.
--- a/Languages/Inform
+++ b/Languages/Inform
@ -1,22 +1,22 @@
-Name: Inform 6
-Details: The C-like interactive fiction language Inform 6
-Extension: .i6
-Line Comment: !
+Name: "Inform 6"
+Details: "The C-like interactive fiction language Inform 6"
+Extension: ".i6"
+Line Comment: "!"
 String Literal: "\""
-String Literal Escape: \
-Character Literal: '
-Character Literal Escape: \
-Binary Literal Prefix: $$
-Hexadecimal Literal Prefix: $
-Negative Literal Prefix: -
+String Literal Escape: "\\"
+Character Literal: "'"
+Character Literal Escape: "\\"
+Binary Literal Prefix: "$$"
+Hexadecimal Literal Prefix: "$"
+Negative Literal Prefix: "-"

-Start Definition: Constant %S =\s
-End Definition: ;\n
+Start Definition: "Constant %S =\s"
+End Definition: ";\n"

-Start Ifdef: #ifdef %S;\n
-End Ifdef: #endif; ! %S\n
-Start Ifndef: #ifndef %S;\n
-End Ifndef: #endif; ! %S\n
+Start Ifdef: "#ifdef %S;\n"
+End Ifdef: "#endif; ! %S\n"
+Start Ifndef: "#ifndef %S;\n"
+End Ifndef: "#endif; ! %S\n"

 # Reserved words:

--- a/Languages/Inform
+++ b/Languages/Inform
@ -1,8 +1,8 @@
-Name: Inform 7
-Details: The natural-language based language Inform 7
-Extension: .i7x
-Multiline Comment Open: [
-Multiline Comment Close: ]
+Name: "Inform 7"
+Details: "The natural-language based language Inform 7"
+Extension: ".i7x"
+Multiline Comment Open: "["
+Multiline Comment Close: "]"
 String Literal: "\""

 # This is here so that tangling the Standard Rules extension doesn't insert
--- a/Languages/None.ildf
+++ b/Languages/None.ildf
@ -1,3 +1,3 @@
-Name: None
-Details: For programs in languages not yet supported by Inweb
-Extension: .txt
+Name: "None"
+Details: "For programs in languages not yet supported by Inweb"
+Extension: ".txt"
--- a/Languages/Perl.ildf
+++ b/Languages/Perl.ildf
@ -1,17 +1,17 @@
-Name: Perl
-Details: The scripting language Perl 5
-Extension: .pl
-Line Comment: #
+Name: "Perl"
+Details: "The scripting language Perl 5"
+Extension: ".pl"
+Line Comment: "#"
 String Literal: "\""
-String Literal Escape: \
-Character Literal: '
-Character Literal Escape: \
+String Literal Escape: "\\"
+Character Literal: "'"
+Character Literal Escape: "\\"

-Shebang: #!/usr/bin/perl\n\n
-Before Named Paragraph Expansion: \n{\n
-After Named Paragraph Expansion: }\n
-Start Definition: %S =
-End Definition: \n;\n
+Shebang: "#!/usr/bin/perl\n\n"
+Before Named Paragraph Expansion: "\n{\n"
+After Named Paragraph Expansion: "}\n"
+Start Definition: "%S ="
+End Definition: "\n;\n"

 # In its usual zany way, Perl recognises the same #line syntax as C, thus in
 # principle overloading its comment notation #:
--- a/Languages/Plain
+++ b/Languages/Plain
@ -1,6 +1,6 @@
-Name: Plain Text
-Details: For text files which are not programs
-Extension: .txt
+Name: "Plain Text"
+Details: "For text files which are not programs"
+Extension: ".txt"

 colouring {
 	=> !plain
--- a/Manual/Supporting
+++ b/Manual/Supporting
@ -46,7 +46,9 @@ This section of the manual is about how to do it.

 Once you have written a definition, use |-read-language L| at the command
 line, where |L| is the file defining it. If you have many custom languages,
-|-read-languages D| reads all of the definitions in a directory |D|.
+|-read-languages D| reads all of the definitions in a directory |D|. Or, if
+the language in question is really quite specific to a single web, you can
+make a |Private Languages| subdirectory of the web and put it in there.

@h Structure of language definitions.
 Each language is defined by a single ILDF file. ("Inweb Language Definition
@ -57,7 +59,7 @@ trailing whitespace on each line is ignored; blank lines are ignored; and
 so are comments, which are lines beginning with a |#| character.

 The ILD contains three sorts of thing:
-(a) Properties, set by lines in the form |Name: C++|.
+(a) Properties, set by lines in the form |Name: "C++"|.
 (b) Keywords, set by lines in the form |keyword int|.
 (c) A colouring program, introduced by |colouring {| and continuing until the
 last block of it is closed with a |}|.
@ -66,15 +68,15 @@ Everything in an ILD is optional, so a minimal ILD is in principle empty. In
 practice, though, every ILD should open like so:

 = (sample ILDF code)
-Name: C
-Details: The C programming language
-Extension: .c
+Name: "C"
+Details: "The C programming language"
+Extension: ".c"

@h Properties.
 Inevitably, there's a miscellaneous shopping list of these, but let's start
 with the semi-compulsory ones.

-|Name|. This is the one used by webs in their |Language: X| lines, and should
+|Name|. This is the one used by webs in their |Language: "X"| lines, and should
 match the ILD's own filename: wherever it is stored, the ILD for langauge |X|
 should be filenamed |X.ildf|.

@ -102,9 +104,9 @@ as a pair or not at all, is the notation for multiline comments.
 For example, C defines:

 = (sample ILDF code)
-    Multiline Comment Open: /*
-    Multiline Comment Close: */
-    Line Comment: //
+    Multiline Comment Open: "/*"
+    Multiline Comment Close: "*/"
+    Line Comment: "//"

@ As noted, comments occur only outside of string or character literals. We
 can give notations for these as follows:
@ -121,9 +123,9 @@ Here, C defines:

 = (sample ILDF code)
    String Literal: "\""
-    String Literal Escape: \
-    Character Literal: '
-    Character Literal Escape: \
+    String Literal Escape: "\\"
+    Character Literal: "'"
+    Character Literal Escape: "\\"

@ Next, numeric literals, like |0xFE45| in C, or |$$10011110| in Inform 6.
 It's assumed that every language allows non-negative decimal numbers.
@ -136,16 +138,16 @@ are notations for non-decimal numbers, if they exist.
 Here, C has:

 = (sample ILDF code)
-    Hexadecimal Literal Prefix: 0x
-    Binary Literal Prefix: 0b
-    Negative Literal Prefix: -
+    Hexadecimal Literal Prefix: "0x"
+    Binary Literal Prefix: "0b"
+    Negative Literal Prefix: "-"

@ |Shebang| is used only in tangling, and is a probably short text added at
 the very beginning of a tangled program. This is useful for scripting languages
 in Unix, where the opening line must be a "shebang" indicating their language.
 For example, Perl defines:
 = (sample ILDF code)
-    Shebang: #!/usr/bin/perl\n\n
+    Shebang: "#!/usr/bin/perl\n\n"
 =
 Most languages do not have a shebang.

@ -170,8 +172,8 @@ matter added. This material is in |Before Named Paragraph Expansion| and

 For C and all similar languages, we recommend this:
 = (sample ILDF code)
-    Before Named Paragraph Expansion: \n{\n
-    After Named Paragraph Expansion: }\n
+    Before Named Paragraph Expansion: "\n{\n"
+    After Named Paragraph Expansion: "}\n"
 =
 The effect of this is to ensure that code such as:
 = (not code)
@ -200,12 +202,12 @@ It can only do so if the language provides a notation for that.
 continue a multiline definition (if they are allowed); and |End Definition|,
 if given, places any ending notation. For example, Inform 6 defines:
 = (sample ILDF code)
-    Start Definition: Constant %S =\s
-    End Definition: ;\n
+    Start Definition: "Constant %S =\s"
+    End Definition: ";\n"
 =
 where |%S| expands to the name of the term to be defined. Thus, we might tangle
 out to:
-= (sample ILDF code)
+= (not code)
    Constant TAXICAB = 1729;\n
 =
 Inweb ignores all definitions unless one of these three properties is given.
@ -216,10 +218,10 @@ makes use of this to handle code dependent on the operating system in use.
 If the language supports it, the notation is in |Start Ifdef| and |End Ifdef|,
 and in |Start Ifndef| and |End Ifndef|. For example, Inform 6 has:
 = (sample ILDF code)
-    Start Ifdef: #ifdef %S;\n
-    End Ifdef: #endif; ! %S\n
-    Start Ifndef: #ifndef %S;\n
-    End Ifndef: #endif; ! %S\n
+    Start Ifdef: "#ifdef %S;\n"
+    End Ifdef: "#endif; ! %S\n"
+    Start Ifndef: "#ifndef %S;\n"
+    End Ifndef: "#endif; ! %S\n"
 =
 which is a subtly different notation from the C one. Again, |%S| expands to
 the name of the term we are conditionally compiling on.
@ -310,88 +312,215 @@ block, that's a line of source code. Blocks normally contain one or more
 "rules":
 = (sample ILDF code)
    colouring {
-        marble => !extract
+        marble => !function
    }
 =
 Rules take the form of "if X, then Y", and the |=>| divides the X from the Y.
 This one says that if the snippet consists of the word "marble", then colour
-it |!extract|. Of course this is not very useful, since it would only catch
+it |!function|. Of course this is not very useful, since it would only catch
 lines containing only that one word. So we really want to narrow in on smaller
-snippets:
+snippets. This, for example, applies its rule to each individual character
+in turn:
 = (sample ILDF code)
    colouring {
        characters {
-            X => !extract
+            K => !identifier
        }
    }
 =
-The effect of the |characters {| ... |}| block is to apply all its rules to
-each character of the snippet owning it. Inside the block, then, the snippet
-is always just a single character, and our rule tells us to paint the letter X
-wherever it occurs.

-@ The block |instances of X| narrows in on each usage of the text |X| inside
+@ In the above examples, |K| and |marble| appeared without quotation marks,
+but they were only allowed to do that because (a) they were single words,
+(b) those words had no other meaning, and (c) they didn't contain any
+awkward characters. For any more complicated texts, always use quotation
+marks. For example, in
+= (sample ILDF code)
+	"=>" => !reserved
+=
+the |=>| in quotes is just text, whereas the one outside quotes is being
+used to divide a rule.
+
+If you need a literal double quote inside the double-quotes, use |\"|; and
+use |\\| for a literal backslash. For example:
+= (sample ILDF code)
+    "\\\"" => !reserved
+=
+actually matches the text |\"|.
+
+@h The six splits.
+|characters| is an example of a "split", which splits up the original snippet
+of text -- say, the line |let K = 2| -- into smaller, non-overlapping snippets
+-- in this case, nine of them: |l|, |e|, |t|, | |, |K|, | |, |=|, | |, and |2|.
+Every split is followed by a block of rules, which is applied to each of the
+pieces in turn. Inweb works sideways-first: thus, if the block contains rules
+R1, R2, ..., then R1 is applied to each piece first, then R2 to each piece,
+and so on.
+
+There are several different ways to split, all of them written in the
+plural, to emphasize that they work on what are usually multiple things.
+Rules, on the other hand, are written in the singular. Splits are not allowed
+to be followed by |=>|: they always begin a block.
+
+1. |characters| splits the snippet into each of its characters.
+
+2. |characters in T| splits the snippet into each of its characters which
+lie inside the text |T|. For example, here is a not very useful ILD for
+plain text in which all vowels are in red:
+
+[[../Private Languages/VowelsExample.ildf as ILDF]]
+
+Given the text:
+= (not code)
+A noir, E blanc, I rouge, U vert, O bleu : voyelles,
+Je dirai quelque jour vos naissances latentes :
+A, noir corset velu des mouches éclatantes
+Qui bombinent autour des puanteurs cruelles,
+=
+this produces:
+= (sample VowelsExample code)
+A noir, E blanc, I rouge, U vert, O bleu : voyelles,
+Je dirai quelque jour vos naissances latentes :
+A, noir corset velu des mouches éclatantes
+Qui bombinent autour des puanteurs cruelles,
+=
+
+3. The split |instances of X| narrows in on each usage of the text |X| inside
 the snippet. For example,
-= (sample ILDF code)
-    colouring {
-        instances of == {
-            => !reserved
-        }
-    }
+[[../Private Languages/LineageExample.ildf as ILDF]]
+acts on the text:
+= (not code)
+Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the
+grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.
 =
-gives every usage of |==| the colour |!reserved|. Note that it never runs in
-an overlapping way: the snippet |===| would be considered as having only one
-instance of |==| (the first two characters), while |====| would have two.
+to produce:
+= (sample LineageExample code)
+Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the
+grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.
+=
+Note that it never runs in an overlapping way: the snippet |===| would be
+considered as having only one instance of |==| (the first two characters),
+while |====| would have two.

-@ Another kind of block is |runs of C|, where |C| is a colour. For example:
-= (sample ILDF code)
-    colouring {
-        runs of !identifier {
-            printf => !function
-            sscanf => !function
-        }
-    }
+4. The split |runs of C|, where |C| describes a colour, splits the snippet
+into non-overlapping contiguous pieces which have that colour. For example:
+[[../Private Languages/RunningExample.ildf as ILDF]]
+acts on:
+= (not code)
+Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,
+who published their so-called Memoirs over the period 1798-1801.
 =
-If this runs on the line |if (x == 1) printf("Hello!");|, then the inner
-block will run three times: its snippet will be |if|, then |x|, then |printf|.
-The rules inside the block will take effect only on the third time, when it
-will paint the word |printf| in |!function| colour.
+to produce:
+= (sample RunningExample code)
+Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,
+who published their so-called Memoirs over the period 1798-1801.
+=
+Here the hyphens in number ranges have been coloured, but not the hyphen
+in "so-called".
+
+A more computer-science sort of example would be:
+[[../Private Languages/StdioExample.ildf as ILDF]]
+which acts on:
+= (not code)
+if (x == 1) printf("Hello!");
+=
+to produce:
+= (sample StdioExample code)
+if (x == 1) printf("Hello!");
+=
+The split divides the line up into three runs, and the inner block runs three
+times: on |if|, then |x|, then |printf|. Only the third time has any effect.

 As a special form, |runs of unquoted| means "runs of characters not painted
 either with |!string| or |!character|". This is special because |unquoted| is
 not a colour.

-@ It remains to specify what rules can do. As noted, they take the form
-"if X, then Y". The following are the possibilities for X, the condition:
+5. The split |matches of /E/|, where |/E/| is a regular expression (see below),
+splits the snippet up into non-overlapping pieces which match it: possibly
+none at all, of course, in which case the block of rules is never used.
+This is easier to demonstrate than explain:
+[[../Private Languages/AssemblageExample.ildf as ILDF]]
+which acts on:
+= (not code)
+		JSR .initialise
+		LDR A, #.data
+		RTS
+	.initialise
+		TAX
+=
+to produce:
+= (sample AssemblageExample code)
+		JSR .initialise
+		LDR A, #.data
+		RTS
+	.initialise
+		TAX
+=

-1. X can be omitted altogether, and then the rule always applies. For example,
-this somewhat nihilistic program gets rid of colouring entirely:
+6. Lastly, the split |brackets in /E/| matches the snippet against the
+regular expression |E|, and then runs the rules on each bracketed
+subexpression in turn. (If there is no match, or there are no bracketed
+terms in |E|, nothing happens.)
+[[../Private Languages/EquationsExample.ildf as ILDF]]
+acts on:
+= (not code)
+	A = 2716
+	B=3
+	C =715 + B
+	D < 14
+=
+to produce:
+= (sample EquationsExample code)
+	A = 2716
+	B=3
+	C =715 + B
+	D < 14
+=
+What happens here is that the expression has two bracketed terms, one for
+the letter, one for the number; the rule is run first on the letter, then
+on the number, and both are turned to |!function|.
+
+@h The seven ways rules can apply.
+Rules are the lines with a |=>| in. As noted, they take the form "if X, then
+Y". The following are the possibilities for X, the condition.
+
+1. The easiest thing is to give nothing at all, and then the rule always
+applies. For example, this somewhat nihilistic program gets rid of colouring
+entirely:
 = (sample ILDF code)
    colouring {
        => !plain
    }
 =
-2. X can require the whole snippet to be of a particular colour, by writing
-|colour C|. For example:
+
+2. If X is a piece of literal text, the rule applies when the snippet is
+exactly that text. For example,
+= (sample ILDF code)
+    printf => !function
+=
+
+3. X can require the whole snippet to be of a particular colour, by writing
+|coloured C|. For example:
 = (sample ILDF code)
    colouring {
        characters {
-            colour !character => !plain
+            coloured !character => !plain
        }
    }
 =
 removes the syntax colouring on character literals.

-3. X can require the snippet to be one of the language's known keywords, as
+4. X can require the snippet to be one of the language's known keywords, as
 declared earlier in the ILD by a |keyword| command. The syntax here is
 |keyword of C|, where |C| is a colour. For example:
 = (sample ILDF code)
    keyword of !element => !element
 =
 says: if the snippet is a keyword declared as being of colour |!element|,
-then actually colour it that way.
+then actually colour it that way. (This is much faster than making many
+comparison rules in a row, one for each keyword in the language; Inweb has
+put all of the registered keywords into a hash table for rapid lookup.)

-4. X can look at a little context before or after the snippet, testing it
+5. X can look at a little context before or after the snippet, testing it
 with one of the following: |prefix P|, |spaced prefix P|,
 |optionally spaced prefix P|. These qualifiers have to do with whether white
 space must appear after |P| and before the snippet. For example,
@ -403,12 +532,69 @@ space must appear after |P| and before the snippet. For example,
 means that any identifier occurring after a |->| token will be coloured
 as |!element|. Similarly for |suffix|.

-5. And otherwise X is literal text, and the rule applies if and only if
-the snippet is exactly that text. For example,
+6. X can test the snippet against a regular expression, with |matching /E/|.
+For example:
 = (sample ILDF code)
-    printf => !function
+    runs of !identifier {
+        matching /.*x.*/ => !element
+    }
+=
+...turns any identifier containing a lower-case |x| into |!element| colour.
+Note that |matching /x/| would not have worked, because our regular expression
+is required to match the entire snippet, not just somewhere inside.
+= (sample ILDF code)
+    characters in "0123456789" {
+        matching /\d\d\d\d/ => !element
+    }
+=
+...colours all four-digit numbers, but no others.

-@ Now let's look at the conclusion Y of a rule. Here the possibilities are
+7. Whenever a split takes place, Inweb keeps count of how many pieces there are,
+and different rules can apply to differently numbered pieces. The notation
+is |number N|, where |N| is the number, counting from 1. For example,
+[[../Private Languages/ThirdExample.ildf as ILDF]]
+acts on:
+= (not code)
+With how sad steps, O Moon, thou climb'st the skies! 
+How silently, and with how wan a face! 
+What, may it be that even in heav'nly place 
+That busy archer his sharp arrows tries! 
+Sure, if that long-with love-acquainted eyes 
+Can judge of love, thou feel'st a lover's case, 
+I read it in thy looks; thy languish'd grace 
+To me, that feel the like, thy state descries. 
+Then, ev'n of fellowship, O Moon, tell me, 
+Is constant love deem'd there but want of wit? 
+Are beauties there as proud as here they be? 
+Do they above love to be lov'd, and yet 
+Those lovers scorn whom that love doth possess? 
+Do they call virtue there ungratefulness?
+=
+to produce:
+= (sample ThirdExample code)
+With how sad steps, O Moon, thou climb'st the skies! 
+How silently, and with how wan a face! 
+What, may it be that even in heav'nly place 
+That busy archer his sharp arrows tries! 
+Sure, if that long-with love-acquainted eyes 
+Can judge of love, thou feel'st a lover's case, 
+I read it in thy looks; thy languish'd grace 
+To me, that feel the like, thy state descries. 
+Then, ev'n of fellowship, O Moon, tell me, 
+Is constant love deem'd there but want of wit? 
+Are beauties there as proud as here they be? 
+Do they above love to be lov'd, and yet 
+Those lovers scorn whom that love doth possess? 
+Do they call virtue there ungratefulness?
+=
+
+@ Any condition can be reversed by preceding it with |not|. For example,
+= (sample ILDF code)
+    not coloured !string => !plain
+=
+
+@h The three ways rules can take effect.
+Now let's look at the conclusion Y of a rule. Here the possibilities are
 simpler:

 1. If Y is the name of a colour, the snippet is painted in that colour.
@ -429,17 +615,17 @@ rules (see above), it can also be applied to the prefix or suffix: use
 the notation |=> C on both| or |=> C on suffix| or |=> C on prefix|.

 3. If Y is the word |debug|, then the current snippet and its colouring
-are printed out on the command line.
-
-@ The syntax of ILDs tends to avoid superfluous quotation marks as confusing,
-but sometimes you need to be pedantic. If you want to match the text |=>|,
-for example, that could lead to ambiguity with the rule marker |=>|. For
-such occasions, simply put the text in double quotes, and change any literal
-double quote in it to |\"|, and use |\\| for a literal backslash. For example:
+are printed out on the command line. Thus:
 = (sample ILDF code)
-    "keyword" => !reserved
+    colouring {
+        matches of /\d\S+/ {
+            => debug
+        }
+    }
+=
+The rule |=> debug| is unconditional, and will print whenever it's reached.

-@h Example.
+@h The worm, Ouroboros.
 Inweb Language Definition Format is a kind of language in itself, and in
 fact Inweb is supplied with an ILD for ILDF itself, which Inweb used to
 syntax-colour the examples above. Here it is, as syntax-coloured by itself:
--- a/Languages/AssemblageExample.ildf
+++ b/Languages/AssemblageExample.ildf
@ -0,0 +1,6 @@
+Name: "AssemblageExample"
+colouring {
+	matches of /\.[A-Za-z_][A-Za-z_0-9]*/ {
+		=> !function
+	}
+}
--- a/Languages/EquationsExample.ildf
+++ b/Languages/EquationsExample.ildf
@ -0,0 +1,7 @@
+Name: "EquationsExample"
+colouring {
+	=> !plain
+	brackets in /.*?([A-Z])\s*=\s*(\d+).*/ {
+		=> !function
+	}
+}
--- a/Languages/LineageExample.ildf
+++ b/Languages/LineageExample.ildf
@ -0,0 +1,7 @@
+Name: "LineageExample"
+colouring {
+	=> !plain
+	instances of "son" {
+		=> !function
+	}
+}
--- a/Languages/RunningExample.ildf
+++ b/Languages/RunningExample.ildf
@ -0,0 +1,10 @@
+Name: "RunningExample"
+colouring {
+	=> !plain
+	characters in "0123456789" {
+		=> !function
+	}
+	runs of !plain {
+		"-" => !function
+	}
+}
--- a/Languages/StdioExample.ildf
+++ b/Languages/StdioExample.ildf
@ -0,0 +1,7 @@
+Name: "StdioExample"
+colouring {
+	runs of !identifier {
+		printf => !function
+		sscanf => !function
+	}
+}
--- a/Languages/ThirdExample.ildf
+++ b/Languages/ThirdExample.ildf
@ -0,0 +1,7 @@
+Name: "ThirdExample"
+colouring {
+	=> !plain
+	matches of /\S+/ {
+		number 3 => !function
+	}
+}
--- a/Languages/VowelsExample.ildf
+++ b/Languages/VowelsExample.ildf
@ -0,0 +1,7 @@
+Name: "VowelsExample"
+colouring {
+	=> !plain
+	characters in "AEIOUaeiou" {
+		=> !function
+	}
+}
--- a/Tangled/inweb.c
+++ b/Tangled/inweb.c
--- a/docs/foundation-module/4-pm.html
+++ b/docs/foundation-module/4-pm.html
@ -469,8 +469,9 @@ says <code class="display"><span class="extract">q</span></code>, the only match
 </li><li>(e) <code class="display"><span class="extract">%i</span></code> means any character from the identifier class (see above);
 </li><li>(f) <code class="display"><span class="extract">%p</span></code> means any character which can be used in the name of a Preform
 nonterminal, which is to say, an identifier character or a hyphen;
-</li><li>(g) <code class="display"><span class="extract">%P</span></code> means the same or else a colon.
-</li><li>(h) <code class="display"><span class="extract">%t</span></code> means a tab.
+</li><li>(g) <code class="display"><span class="extract">%P</span></code> means the same or else a colon;
+</li><li>(h) <code class="display"><span class="extract">%t</span></code> means a tab;
+</li><li>(i) <code class="display"><span class="extract">%q</span></code> means a double-quote.
 </li></ul>
 <p class="inwebparagraph"><code class="display"><span class="extract">%</span></code> otherwise makes a literal escape; a space means any whitespace character;
 square brackets enclose literal alternatives, and note as usual with grep
--- a/docs/inweb/4-pl.html
+++ b/docs/inweb/4-pl.html
@ -524,6 +524,7 @@ little context before it (where available).
    <span class="definitionkeyword">define</span> <span class="constant">UNSPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">5</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">suffix P</span></code></span>
    <span class="definitionkeyword">define</span> <span class="constant">SPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">6</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">spaced suffix P</span></code></span>
    <span class="definitionkeyword">define</span> <span class="constant">OPTIONALLY_SPACED_RULE_SUFFIX</span><span class="plain"> </span><span class="constant">7</span><span class="plain"> </span><span class="comment">for <code class="display"><span class="extract">optionally spaced suffix P</span></code></span>
+    <span class="definitionkeyword">define</span> <span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain"> </span><span class="constant">64</span>
 </pre>

 <pre class="display">
@ -533,6 +534,7 @@ little context before it (where available).
        <span class="reserved">int</span><span class="plain"> </span><span class="identifier">match_keyword_of_colour</span><span class="plain">; </span><span class="comment">for <code class="display"><span class="extract">keyword C</span></code>, or else <code class="display"><span class="extract">NOT_A_COLOUR</span></code></span>
        <span class="reserved">struct</span><span class="plain"> </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">match_text</span><span class="plain">; </span><span class="comment">or length 0 to mean "anything"</span>
        <span class="reserved">int</span><span class="plain"> </span><span class="identifier">match_prefix</span><span class="plain">; </span><span class="comment">one of the <code class="display"><span class="extract">*_RULE_PREFIX</span></code> values above</span>
+        <span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">match_regexp_text</span><span class="plain">[</span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">];</span>

        <span class="comment">the conclusion:</span>
        <span class="reserved">struct</span><span class="plain"> </span><span class="reserved">colouring_language_block</span><span class="plain"> *</span><span class="identifier">execute_block</span><span class="plain">; </span><span class="comment">or <code class="display"><span class="extract">NULL</span></code>, in which case...</span>
@ -542,6 +544,7 @@ little context before it (where available).

        <span class="comment">workspace during painting</span>
        <span class="reserved">int</span><span class="plain"> </span><span class="identifier">fix_position</span><span class="plain">; </span><span class="comment">where the prefix or suffix started</span>
+        <span class="reserved">struct</span><span class="plain"> </span><span class="reserved">match_results</span><span class="plain"> </span><span class="identifier">mr</span><span class="plain">; </span><span class="comment">of a regular expression</span>
        <span class="constant">MEMORY_MANAGEMENT</span>
    <span class="plain">} </span><span class="reserved">colouring_rule</span><span class="plain">;</span>
 </pre>
@ -562,11 +565,15 @@ little context before it (where available).
        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain"> = </span><span class="identifier">NULL</span><span class="plain">;</span>
        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">NOT_A_RULE_PREFIX</span><span class="plain">;</span>
        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_keyword_of_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
+        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_regexp_text</span><span class="plain">[0] = </span><span class="constant">0</span><span class="plain">;</span>

        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">set_to_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">set_prefix_to_colour</span><span class="plain"> = </span><span class="constant">NOT_A_COLOUR</span><span class="plain">;</span>
        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">execute_block</span><span class="plain"> = </span><span class="identifier">NULL</span><span class="plain">;</span>
        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">debug</span><span class="plain"> = </span><span class="constant">FALSE</span><span class="plain">;</span>
+
+        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">fix_position</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">;</span>
+        <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">mr</span><span class="plain"> = </span><span class="functiontext">Regexp::create_mr</span><span class="plain">();</span>
        <span class="reserved">return</span><span class="plain"> </span><span class="identifier">rule</span><span class="plain">;</span>
    <span class="plain">}</span>
 </pre>
@ -607,6 +614,8 @@ little context before it (where available).
        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&amp;</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"prefix (%c+)"</span><span class="plain">)) {</span>
            <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">UNSPACED_RULE_PREFIX</span><span class="plain">;</span>
            <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain"> = </span><span class="functiontext">Languages::text</span><span class="plain">(</span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">, </span><span class="constant">FALSE</span><span class="plain">);</span>
+        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&amp;</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"match (%c+)"</span><span class="plain">)) {</span>
+            <span class="functiontext">Languages::regexp</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_regexp_text</span><span class="plain">, </span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">);</span>
        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&amp;</span><span class="identifier">mr</span><span class="plain">, </span><span class="identifier">premiss</span><span class="plain">, </span><span class="identifier">L</span><span class="string">"spaced prefix (%c+)"</span><span class="plain">)) {</span>
            <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> = </span><span class="constant">SPACED_RULE_PREFIX</span><span class="plain">;</span>
            <span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain"> = </span><span class="functiontext">Languages::text</span><span class="plain">(</span><span class="identifier">mr</span><span class="plain">.</span><span class="element">exp</span><span class="plain">[0], </span><span class="identifier">tfp</span><span class="plain">, </span><span class="constant">FALSE</span><span class="plain">);</span>
@ -823,6 +832,81 @@ literal backslash.

 <p class="endnote">The function Languages::text is used in <a href="#SP7_1">&#167;7.1</a>, <a href="#SP7_2">&#167;7.2</a>, <a href="#SP12_1">&#167;12.1</a>.</p>

+<p class="inwebparagraph"><a id="SP17"></a><b>&#167;17.  </b>And regular expressions.
+</p>
+
+
+<pre class="display">
+    <span class="reserved">void</span><span class="plain"> </span><span class="functiontext">Languages::regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">T</span><span class="plain">, </span><span class="reserved">text_file_position</span><span class="plain"> *</span><span class="identifier">tfp</span><span class="plain">) {</span>
+        <span class="reserved">if</span><span class="plain"> (</span><span class="identifier">write_to</span><span class="plain"> == </span><span class="identifier">NULL</span><span class="plain">) </span><span class="identifier">internal_error</span><span class="plain">(</span><span class="string">"no buffer"</span><span class="plain">);</span>
+        <span class="identifier">write_to</span><span class="plain">[0] = </span><span class="constant">0</span><span class="plain">;</span>
+        <span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">) &gt; </span><span class="constant">0</span><span class="plain">) {</span>
+            <span class="reserved">int</span><span class="plain"> </span><span class="identifier">from</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">, </span><span class="identifier">to</span><span class="plain"> = </span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">)-1, </span><span class="identifier">x</span><span class="plain"> = </span><span class="constant">0</span><span class="plain">;</span>
+            <span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">to</span><span class="plain"> &gt; </span><span class="identifier">from</span><span class="plain">) &amp;&amp;</span>
+                <span class="plain">(</span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">from</span><span class="plain">) == </span><span class="character">'/'</span><span class="plain">) &amp;&amp; (</span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">to</span><span class="plain">) == </span><span class="character">'/'</span><span class="plain">)) {</span>
+                <span class="identifier">from</span><span class="plain">++; </span><span class="identifier">to</span><span class="plain">--;</span>
+                <span class="reserved">for</span><span class="plain"> (</span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">=</span><span class="identifier">from</span><span class="plain">; </span><span class="identifier">i</span><span class="plain">&lt;=</span><span class="identifier">to</span><span class="plain">; </span><span class="identifier">i</span><span class="plain">++) {</span>
+                    <span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain"> = </span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">);</span>
+                    <span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'\\'</span><span class="plain">) {</span>
+                        <span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">w</span><span class="plain"> = </span><span class="functiontext">Str::get_at</span><span class="plain">(</span><span class="identifier">T</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">+1);</span>
+                        <span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'\\'</span><span class="plain">) {</span>
+                            <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">w</span><span class="plain">);</span>
+                        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'d'</span><span class="plain">) {</span>
+                            <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'d'</span><span class="plain">);</span>
+                        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'t'</span><span class="plain">) {</span>
+                            <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'t'</span><span class="plain">);</span>
+                        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'s'</span><span class="plain">) {</span>
+                            <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">' '</span><span class="plain">);</span>
+                        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'S'</span><span class="plain">) {</span>
+                            <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'C'</span><span class="plain">);</span>
+                        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="identifier">w</span><span class="plain"> == </span><span class="character">'"'</span><span class="plain">) {</span>
+                            <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'q'</span><span class="plain">);</span>
+                        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> {</span>
+                            <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">w</span><span class="plain">);</span>
+                        <span class="plain">}</span>
+                        <span class="identifier">i</span><span class="plain">++;</span>
+                        <span class="reserved">continue</span><span class="plain">;</span>
+                    <span class="plain">}</span>
+                    <span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'.'</span><span class="plain">) {</span>
+                        <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'c'</span><span class="plain">);</span>
+                        <span class="reserved">continue</span><span class="plain">;</span>
+                    <span class="plain">}</span>
+                    <span class="reserved">if</span><span class="plain"> (</span><span class="identifier">c</span><span class="plain"> == </span><span class="character">'%'</span><span class="plain">) {</span>
+                        <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="character">'%'</span><span class="plain">);</span>
+                        <span class="reserved">continue</span><span class="plain">;</span>
+                    <span class="plain">}</span>
+                    <span class="identifier">x</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">x</span><span class="plain">, </span><span class="identifier">c</span><span class="plain">);</span>
+                <span class="plain">}</span>
+            <span class="plain">} </span><span class="reserved">else</span><span class="plain"> {</span>
+                <span class="functiontext">Errors::in_text_file</span><span class="plain">(</span>
+                    <span class="string">"the expression to match must be in slashes '/'"</span><span class="plain">, </span><span class="identifier">tfp</span><span class="plain">);</span>
+            <span class="plain">}</span>
+            <span class="reserved">if</span><span class="plain"> (</span><span class="identifier">x</span><span class="plain"> &gt;= </span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">)</span>
+                <span class="functiontext">Errors::in_text_file</span><span class="plain">(</span>
+                    <span class="string">"the expression to match is too long"</span><span class="plain">, </span><span class="identifier">tfp</span><span class="plain">);</span>
+        <span class="plain">}</span>
+    <span class="plain">}</span>
+
+    <span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
+        <span class="reserved">if</span><span class="plain"> (</span><span class="identifier">i</span><span class="plain"> &lt; </span><span class="constant">MAX_ILDF_REGEXP_LENGTH</span><span class="plain">) </span><span class="identifier">write_to</span><span class="plain">[</span><span class="identifier">i</span><span class="plain">++] = </span><span class="identifier">c</span><span class="plain">;</span>
+        <span class="reserved">return</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">;</span>
+    <span class="plain">}</span>
+
+    <span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Languages::add_escape_to_regexp</span><span class="plain">(</span><span class="identifier">wchar_t</span><span class="plain"> *</span><span class="identifier">write_to</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">wchar_t</span><span class="plain"> </span><span class="identifier">c</span><span class="plain">) {</span>
+        <span class="identifier">i</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">, </span><span class="character">'%'</span><span class="plain">);</span>
+        <span class="identifier">i</span><span class="plain"> = </span><span class="functiontext">Languages::add_to_regexp</span><span class="plain">(</span><span class="identifier">write_to</span><span class="plain">, </span><span class="identifier">i</span><span class="plain">, </span><span class="identifier">c</span><span class="plain">);</span>
+        <span class="reserved">return</span><span class="plain"> </span><span class="identifier">i</span><span class="plain">;</span>
+    <span class="plain">}</span>
+</pre>
+
+<p class="inwebparagraph"></p>
+
+<p class="endnote">The function Languages::regexp is used in <a href="#SP12_1">&#167;12.1</a>.</p>
+
+<p class="endnote">The function Languages::add_to_regexp appears nowhere else.</p>
+
+<p class="endnote">The function Languages::add_escape_to_regexp appears nowhere else.</p>
+
 <hr class="tocbar">
 <ul class="toc"><li><i>(This section begins Chapter 4: Languages.)</i></li><li><a href="4-lm.html">Continue with 'Language Methods'</a></li></ul><hr class="tocbar">
 <!--End of weave-->
--- a/docs/inweb/4-tp.html
+++ b/docs/inweb/4-tp.html
@ -378,7 +378,10 @@ rule across the whole snippet before moving on to the next.
 <pre class="display">
    <span class="reserved">int</span><span class="plain"> </span><span class="functiontext">Painter::satisfies</span><span class="plain">(</span><span class="reserved">hash_table</span><span class="plain"> *</span><span class="identifier">HT</span><span class="plain">, </span><span class="reserved">colouring_rule</span><span class="plain"> *</span><span class="identifier">rule</span><span class="plain">, </span><span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">matter</span><span class="plain">,</span>
        <span class="reserved">text_stream</span><span class="plain"> *</span><span class="identifier">colouring</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">from</span><span class="plain">, </span><span class="reserved">int</span><span class="plain"> </span><span class="identifier">to</span><span class="plain">) {</span>
-        <span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain">) &gt; </span><span class="constant">0</span><span class="plain">) {</span>
+        <span class="reserved">if</span><span class="plain"> (</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="identifier">match_regexp_text</span><span class="plain">[0]) {</span>
+            <span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Regexp::match</span><span class="plain">(&amp;(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">mr</span><span class="plain">), </span><span class="identifier">matter</span><span class="plain">, </span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_regexp_text</span><span class="plain">) == </span><span class="constant">FALSE</span><span class="plain">)</span>
+                <span class="reserved">return</span><span class="plain"> </span><span class="constant">FALSE</span><span class="plain">;</span>
+        <span class="plain">} </span><span class="reserved">else</span><span class="plain"> </span><span class="reserved">if</span><span class="plain"> (</span><span class="functiontext">Str::len</span><span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_text</span><span class="plain">) &gt; </span><span class="constant">0</span><span class="plain">) {</span>
            <span class="reserved">if</span><span class="plain"> ((</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="identifier">match_prefix</span><span class="plain"> == </span><span class="constant">UNSPACED_RULE_PREFIX</span><span class="plain">) ||</span>
                <span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> == </span><span class="constant">SPACED_RULE_PREFIX</span><span class="plain">) ||</span>
                <span class="plain">(</span><span class="identifier">rule</span><span class="plain">-&gt;</span><span class="element">match_prefix</span><span class="plain"> == </span><span class="constant">OPTIONALLY_SPACED_RULE_PREFIX</span><span class="plain">)) {</span>
--- a/docs/inweb/M-spl.html
+++ b/docs/inweb/M-spl.html
@ -1,7 +1,7 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>
 	<head>
-		<title>M/awwp</title>
+		<title>Booklet Title</title>
 		<meta name="viewport" content="width=device-width initial-scale=1">
 		<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 		<meta http-equiv="Content-Language" content="en-gb">
@ -26,7 +26,7 @@
 <!--Weave of 'M/spl' generated by 7-->
 <ul class="crumbs"><li><a href="../webs.html">Source</a></li><li><a href="index.html">inweb</a></li><li><a href="index.html#M">Manual</a></li><li><b>Supporting Programming Languages</b></li></ul><p class="purpose">How to work with a programming language not yet supported by Inweb.</p>

-<ul class="toc"><li><a href="#SP1">&#167;1. Introduction</a></li><li><a href="#SP4">&#167;4. Structure of language definitions</a></li><li><a href="#SP5">&#167;5. Properties</a></li><li><a href="#SP16">&#167;16. Secret Features</a></li><li><a href="#SP17">&#167;17. Keywords</a></li><li><a href="#SP18">&#167;18. Syntax colouring program</a></li><li><a href="#SP26">&#167;26. Example</a></li></ul><hr class="tocbar">
+<ul class="toc"><li><a href="#SP1">&#167;1. Introduction</a></li><li><a href="#SP4">&#167;4. Structure of language definitions</a></li><li><a href="#SP5">&#167;5. Properties</a></li><li><a href="#SP16">&#167;16. Secret Features</a></li><li><a href="#SP17">&#167;17. Keywords</a></li><li><a href="#SP18">&#167;18. Syntax colouring program</a></li><li><a href="#SP22">&#167;22. The six splits</a></li><li><a href="#SP23">&#167;23. The seven ways rules can apply</a></li><li><a href="#SP25">&#167;25. The three ways rules can take effect</a></li><li><a href="#SP26">&#167;26. The worm, Ouroboros</a></li></ul><hr class="tocbar">

 <p class="inwebparagraph"><a id="SP1"></a><b>&#167;1. Introduction. </b>To a very large extent, Inweb works the same way regardless of what language
 its webs are using, and that is deliberate. On the other hand, when a web
@ -96,7 +96,9 @@ This section of the manual is about how to do it.

 <p class="inwebparagraph">Once you have written a definition, use <code class="display"><span class="extract">-read-language L</span></code> at the command
 line, where <code class="display"><span class="extract">L</span></code> is the file defining it. If you have many custom languages,
-<code class="display"><span class="extract">-read-languages D</span></code> reads all of the definitions in a directory <code class="display"><span class="extract">D</span></code>.
+<code class="display"><span class="extract">-read-languages D</span></code> reads all of the definitions in a directory <code class="display"><span class="extract">D</span></code>. Or, if
+the language in question is really quite specific to a single web, you can
+make a <code class="display"><span class="extract">Private Languages</span></code> subdirectory of the web and put it in there.
 </p>

 <p class="inwebparagraph"><a id="SP4"></a><b>&#167;4. Structure of language definitions. </b>Each language is defined by a single ILDF file. ("Inweb Language Definition
@ -111,7 +113,7 @@ so are comments, which are lines beginning with a <code class="display"><span cl
 <p class="inwebparagraph">The ILD contains three sorts of thing:
 </p>

-<ul class="items"><li>(a) Properties, set by lines in the form <code class="display"><span class="extract">Name: C++</span></code>.
+<ul class="items"><li>(a) Properties, set by lines in the form <code class="display"><span class="extract">Name: "C++"</span></code>.
 </li><li>(b) Keywords, set by lines in the form <code class="display"><span class="extract">keyword int</span></code>.
 </li><li>(c) A colouring program, introduced by <code class="display"><span class="extract">colouring {</span></code> and continuing until the
 last block of it is closed with a <code class="display"><span class="extract">}</span></code>.
@ -122,9 +124,9 @@ practice, though, every ILD should open like so:


 <pre class="display">
-    <span class="identifier">Name</span><span class="plain">: </span><span class="identifier">C</span>
-    <span class="identifier">Details</span><span class="plain">: </span><span class="identifier">The</span><span class="plain"> </span><span class="identifier">C</span><span class="plain"> </span><span class="identifier">programming</span><span class="plain"> </span><span class="identifier">language</span>
-    <span class="identifier">Extension</span><span class="plain">: .</span><span class="identifier">c</span>
+    <span class="plain">Name: </span><span class="string">"C"</span>
+    <span class="plain">Details: </span><span class="string">"The C programming language"</span>
+    <span class="plain">Extension: </span><span class="string">".c"</span>
 </pre>

 <p class="inwebparagraph"></p>
@ -133,7 +135,7 @@ practice, though, every ILD should open like so:
 with the semi-compulsory ones.
 </p>

-<p class="inwebparagraph"><code class="display"><span class="extract">Name</span></code>. This is the one used by webs in their <code class="display"><span class="extract">Language: X</span></code> lines, and should
+<p class="inwebparagraph"><code class="display"><span class="extract">Name</span></code>. This is the one used by webs in their <code class="display"><span class="extract">Language: "X"</span></code> lines, and should
 match the ILD's own filename: wherever it is stored, the ILD for langauge <code class="display"><span class="extract">X</span></code>
 should be filenamed <code class="display"><span class="extract">X.ildf</span></code>.
 </p>
@ -169,9 +171,9 @@ as a pair or not at all, is the notation for multiline comments.


 <pre class="display">
-        <span class="identifier">Multiline</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain"> </span><span class="identifier">Open</span><span class="plain">: /*</span>
-        <span class="identifier">Multiline</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain"> </span><span class="identifier">Close</span><span class="plain">: */</span>
-        <span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain">: //</span>
+        <span class="plain">Multiline Comment Open: </span><span class="string">"/*"</span>
+        <span class="plain">Multiline Comment Close: </span><span class="string">"*/"</span>
+        <span class="plain">Line Comment: </span><span class="string">"//"</span>
 </pre>

 <p class="inwebparagraph"></p>
@ -196,10 +198,10 @@ character literals.


 <pre class="display">
-        <span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: </span><span class="string">"\""</span>
-        <span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
-        <span class="identifier">Character</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: '</span>
-        <span class="identifier">Character</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
+        <span class="plain">String Literal: </span><span class="string">"\""</span>
+        <span class="plain">String Literal Escape: </span><span class="string">"\\"</span>
+        <span class="plain">Character Literal: </span><span class="string">"'"</span>
+        <span class="plain">Character Literal Escape: </span><span class="string">"\\"</span>
 </pre>

 <p class="inwebparagraph"></p>
@ -220,9 +222,9 @@ are notations for non-decimal numbers, if they exist.


 <pre class="display">
-        <span class="identifier">Hexadecimal</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: </span><span class="constant">0</span><span class="identifier">x</span>
-        <span class="identifier">Binary</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: </span><span class="constant">0</span><span class="identifier">b</span>
-        <span class="identifier">Negative</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Prefix</span><span class="plain">: -</span>
+        <span class="plain">Hexadecimal Literal Prefix: </span><span class="string">"0x"</span>
+        <span class="plain">Binary Literal Prefix: </span><span class="string">"0b"</span>
+        <span class="plain">Negative Literal Prefix: </span><span class="string">"-"</span>
 </pre>

 <p class="inwebparagraph"></p>
@ -235,7 +237,7 @@ For example, Perl defines:


 <pre class="display">
-        <span class="identifier">Shebang</span><span class="plain">: #!/</span><span class="identifier">usr</span><span class="plain">/</span><span class="identifier">bin</span><span class="plain">/</span><span class="identifier">perl</span><span class="plain">\</span><span class="identifier">n</span><span class="plain">\</span><span class="identifier">n</span>
+        <span class="plain">Shebang: </span><span class="string">"#!/usr/bin/perl\n\n"</span>
 </pre>

 <p class="inwebparagraph">Most languages do not have a shebang.
@ -255,7 +257,7 @@ that this language does, and gives the notation. For example, C provides:


 <pre class="display">
-        <span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Marker</span><span class="plain">: </span><span class="string">"#line %d \"%f\"\n"</span>
+        <span class="plain">Line Marker: </span><span class="string">"#line %d \"%f\"\n"</span>
 </pre>

 <p class="inwebparagraph">Here <code class="display"><span class="extract">%d</span></code> expands to the line number, and <code class="display"><span class="extract">%f</span></code> the filename, of origin.
@ -272,8 +274,8 @@ matter added. This material is in <code class="display"><span class="extract">Be


 <pre class="display">
-        <span class="identifier">Before</span><span class="plain"> </span><span class="identifier">Named</span><span class="plain"> </span><span class="identifier">Paragraph</span><span class="plain"> </span><span class="identifier">Expansion</span><span class="plain">: \</span><span class="identifier">n</span><span class="reserved">{</span><span class="plain">\</span><span class="identifier">n</span>
-        <span class="identifier">After</span><span class="plain"> </span><span class="identifier">Named</span><span class="plain"> </span><span class="identifier">Paragraph</span><span class="plain"> </span><span class="identifier">Expansion</span><span class="plain">: </span><span class="reserved">}</span><span class="plain">\</span><span class="identifier">n</span>
+        <span class="plain">Before Named Paragraph Expansion: </span><span class="string">"\n{\n"</span>
+        <span class="plain">After Named Paragraph Expansion: </span><span class="string">"}\n"</span>
 </pre>

 <p class="inwebparagraph">The effect of this is to ensure that code such as:
@ -317,8 +319,8 @@ if given, places any ending notation. For example, Inform 6 defines:


 <pre class="display">
-        <span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain">: </span><span class="identifier">Constant</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain"> =\</span><span class="identifier">s</span>
-        <span class="identifier">End</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain">: ;\</span><span class="identifier">n</span>
+        <span class="plain">Start Definition: </span><span class="string">"Constant %S =\s"</span>
+        <span class="plain">End Definition: </span><span class="string">";\n"</span>
 </pre>

 <p class="inwebparagraph">where <code class="display"><span class="extract">%S</span></code> expands to the name of the term to be defined. Thus, we might tangle
@ -327,7 +329,7 @@ out to:


 <pre class="display">
-        <span class="identifier">Constant</span><span class="plain"> </span><span class="identifier">TAXICAB</span><span class="plain"> = </span><span class="constant">1729</span><span class="plain">;\</span><span class="identifier">n</span>
+        <span class="plain">Constant TAXICAB = 1729;\n</span>
 </pre>

 <p class="inwebparagraph">Inweb ignores all definitions unless one of these three properties is given.
@ -342,10 +344,10 @@ and in <code class="display"><span class="extract">Start Ifndef</span></code> an


 <pre class="display">
-        <span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Ifdef</span><span class="plain">: #</span><span class="identifier">ifdef</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain">;\</span><span class="identifier">n</span>
-        <span class="identifier">End</span><span class="plain"> </span><span class="identifier">Ifdef</span><span class="plain">: #</span><span class="identifier">endif</span><span class="plain">; ! %</span><span class="identifier">S</span><span class="plain">\</span><span class="identifier">n</span>
-        <span class="identifier">Start</span><span class="plain"> </span><span class="identifier">Ifndef</span><span class="plain">: #</span><span class="identifier">ifndef</span><span class="plain"> %</span><span class="identifier">S</span><span class="plain">;\</span><span class="identifier">n</span>
-        <span class="identifier">End</span><span class="plain"> </span><span class="identifier">Ifndef</span><span class="plain">: #</span><span class="identifier">endif</span><span class="plain">; ! %</span><span class="identifier">S</span><span class="plain">\</span><span class="identifier">n</span>
+        <span class="plain">Start Ifdef: </span><span class="string">"#ifdef %S;\n"</span>
+        <span class="plain">End Ifdef: </span><span class="string">"#endif; ! %S\n"</span>
+        <span class="plain">Start Ifndef: </span><span class="string">"#ifndef %S;\n"</span>
+        <span class="plain">End Ifndef: </span><span class="string">"#endif; ! %S\n"</span>
 </pre>

 <p class="inwebparagraph">which is a subtly different notation from the C one. Again, <code class="display"><span class="extract">%S</span></code> expands to
@ -393,7 +395,7 @@ in the language in question. For C, then, we include the line:


 <pre class="display">
-        <span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">void</span>
+        <span class="reserved">keyword</span><span class="plain"> </span><span class="string">void</span>
 </pre>

 <p class="inwebparagraph">Keywords can be declared in a number of categories, which are identified by
@ -403,7 +405,7 @@ for example:


 <pre class="display">
-        <span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">isdigit</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">function</span>
+        <span class="reserved">keyword</span><span class="plain"> </span><span class="string">isdigit</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!function</span>
 </pre>

 <p class="inwebparagraph">makes a keyword of colour <code class="display"><span class="extract">!function</span></code>.
@ -420,8 +422,8 @@ palette of possibilities:


 <pre class="display">
-    <span class="plain">!</span><span class="element">character</span><span class="plain">  !</span><span class="element">comment</span><span class="plain">     !</span><span class="element">constant</span><span class="plain">  !</span><span class="element">definition</span><span class="plain">  !</span><span class="element">element</span><span class="plain">  !</span><span class="element">extract</span>
-    <span class="plain">!</span><span class="element">function</span><span class="plain">   !</span><span class="element">identifier</span><span class="plain">  !</span><span class="element">plain</span><span class="plain">     !</span><span class="element">reserved</span><span class="plain">    !</span><span class="element">string</span>
+    <span class="element">!character</span><span class="plain">  </span><span class="element">!comment</span><span class="plain">     </span><span class="element">!constant</span><span class="plain">  </span><span class="element">!definition</span><span class="plain">  </span><span class="element">!element</span><span class="plain">  </span><span class="element">!extract</span>
+    <span class="element">!function</span><span class="plain">   </span><span class="element">!identifier</span><span class="plain">  </span><span class="element">!plain</span><span class="plain">     </span><span class="element">!reserved</span><span class="plain">    </span><span class="element">!string</span>
 </pre>

 <p class="inwebparagraph">Each character has its own colour. At the start of the process, every
@ -454,7 +456,7 @@ empty program is legal but does nothing:


 <pre class="display">
-        <span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+        <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
        <span class="reserved">}</span>
 </pre>

@ -466,68 +468,225 @@ block, that's a line of source code. Blocks normally contain one or more


 <pre class="display">
-        <span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
-            <span class="identifier">marble</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">extract</span>
+        <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="string">marble</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
        <span class="reserved">}</span>
 </pre>

 <p class="inwebparagraph">Rules take the form of "if X, then Y", and the <code class="display"><span class="extract">=&gt;</span></code> divides the X from the Y.
 This one says that if the snippet consists of the word "marble", then colour
-it <code class="display"><span class="extract">!extract</span></code>. Of course this is not very useful, since it would only catch
+it <code class="display"><span class="extract">!function</span></code>. Of course this is not very useful, since it would only catch
 lines containing only that one word. So we really want to narrow in on smaller
-snippets:
+snippets. This, for example, applies its rule to each individual character
+in turn:
 </p>


 <pre class="display">
-        <span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
-            <span class="identifier">characters</span><span class="plain"> </span><span class="reserved">{</span>
-                <span class="identifier">X</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">extract</span>
+        <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
+                <span class="string">K</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!identifier</span>
            <span class="reserved">}</span>
        <span class="reserved">}</span>
 </pre>

-<p class="inwebparagraph">The effect of the <code class="display"><span class="extract">characters {</span></code> ... <code class="display"><span class="extract">}</span></code> block is to apply all its rules to
-each character of the snippet owning it. Inside the block, then, the snippet
-is always just a single character, and our rule tells us to paint the letter X
-wherever it occurs.
+<p class="inwebparagraph"></p>
+
+<p class="inwebparagraph"><a id="SP21"></a><b>&#167;21.  </b>In the above examples, <code class="display"><span class="extract">K</span></code> and <code class="display"><span class="extract">marble</span></code> appeared without quotation marks,
+but they were only allowed to do that because (a) they were single words,
 </p>

-<p class="inwebparagraph"><a id="SP21"></a><b>&#167;21.  </b>The block <code class="display"><span class="extract">instances of X</span></code> narrows in on each usage of the text <code class="display"><span class="extract">X</span></code> inside
+<ul class="items"><li>(b) those words had no other meaning, and (c) they didn't contain any
+awkward characters. For any more complicated texts, always use quotation
+marks. For example, in
+</li></ul>
+
+<pre class="display">
+        <span class="string">"=&gt;"</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
+</pre>
+
+<p class="inwebparagraph">the <code class="display"><span class="extract">=&gt;</span></code> in quotes is just text, whereas the one outside quotes is being
+used to divide a rule.
+</p>
+
+<p class="inwebparagraph">If you need a literal double quote inside the double-quotes, use <code class="display"><span class="extract">\"</span></code>; and
+use <code class="display"><span class="extract">\\</span></code> for a literal backslash. For example:
+</p>
+
+
+<pre class="display">
+        <span class="string">"\\\""</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
+</pre>
+
+<p class="inwebparagraph">actually matches the text <code class="display"><span class="extract">\"</span></code>.
+</p>
+
+<p class="inwebparagraph"><a id="SP22"></a><b>&#167;22. The six splits. </b><code class="display"><span class="extract">characters</span></code> is an example of a "split", which splits up the original snippet
+of text &mdash; say, the line <code class="display"><span class="extract">let K = 2</span></code> &mdash; into smaller, non-overlapping snippets
+&mdash; in this case, nine of them: <code class="display"><span class="extract">l</span></code>, <code class="display"><span class="extract">e</span></code>, <code class="display"><span class="extract">t</span></code>, <code class="display"><span class="extract"> </span></code>, <code class="display"><span class="extract">K</span></code>, <code class="display"><span class="extract"> </span></code>, <code class="display"><span class="extract">=</span></code>, <code class="display"><span class="extract"> </span></code>, and <code class="display"><span class="extract">2</span></code>.
+Every split is followed by a block of rules, which is applied to each of the
+pieces in turn. Inweb works sideways-first: thus, if the block contains rules
+R1, R2, ..., then R1 is applied to each piece first, then R2 to each piece,
+and so on.
+</p>
+
+<p class="inwebparagraph">There are several different ways to split, all of them written in the
+plural, to emphasize that they work on what are usually multiple things.
+Rules, on the other hand, are written in the singular. Splits are not allowed
+to be followed by <code class="display"><span class="extract">=&gt;</span></code>: they always begin a block.
+</p>
+
+<p class="inwebparagraph">1. <code class="display"><span class="extract">characters</span></code> splits the snippet into each of its characters.
+</p>
+
+<p class="inwebparagraph">2. <code class="display"><span class="extract">characters in T</span></code> splits the snippet into each of its characters which
+lie inside the text <code class="display"><span class="extract">T</span></code>. For example, here is a not very useful ILD for
+plain text in which all vowels are in red:
+</p>
+
+<pre class="display">
+    <span class="plain">Name: </span><span class="string">"VowelsExample"</span>
+    <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">	</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
+    <span class="plain">	</span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"AEIOUaeiou"</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="reserved">}</span>
+
+</pre>
+
+<p class="inwebparagraph">Given the text:
+</p>
+
+
+<pre class="display">
+    <span class="plain">A noir, E blanc, I rouge, U vert, O bleu : voyelles,</span>
+    <span class="plain">Je dirai quelque jour vos naissances latentes :</span>
+    <span class="plain">A, noir corset velu des mouches éclatantes</span>
+    <span class="plain">Qui bombinent autour des puanteurs cruelles,</span>
+</pre>
+
+<p class="inwebparagraph">this produces:
+</p>
+
+
+<pre class="display">
+    <span class="functiontext">A</span><span class="plain"> n</span><span class="functiontext">oi</span><span class="plain">r, </span><span class="functiontext">E</span><span class="plain"> bl</span><span class="functiontext">a</span><span class="plain">nc, </span><span class="functiontext">I</span><span class="plain"> r</span><span class="functiontext">ou</span><span class="plain">g</span><span class="functiontext">e</span><span class="plain">, </span><span class="functiontext">U</span><span class="plain"> v</span><span class="functiontext">e</span><span class="plain">rt, </span><span class="functiontext">O</span><span class="plain"> bl</span><span class="functiontext">eu</span><span class="plain"> : v</span><span class="functiontext">o</span><span class="plain">y</span><span class="functiontext">e</span><span class="plain">ll</span><span class="functiontext">e</span><span class="plain">s,</span>
+    <span class="plain">J</span><span class="functiontext">e</span><span class="plain"> d</span><span class="functiontext">i</span><span class="plain">r</span><span class="functiontext">ai</span><span class="plain"> q</span><span class="functiontext">ue</span><span class="plain">lq</span><span class="functiontext">ue</span><span class="plain"> j</span><span class="functiontext">ou</span><span class="plain">r v</span><span class="functiontext">o</span><span class="plain">s n</span><span class="functiontext">ai</span><span class="plain">ss</span><span class="functiontext">a</span><span class="plain">nc</span><span class="functiontext">e</span><span class="plain">s l</span><span class="functiontext">a</span><span class="plain">t</span><span class="functiontext">e</span><span class="plain">nt</span><span class="functiontext">e</span><span class="plain">s :</span>
+    <span class="functiontext">A</span><span class="plain">, n</span><span class="functiontext">oi</span><span class="plain">r c</span><span class="functiontext">o</span><span class="plain">rs</span><span class="functiontext">e</span><span class="plain">t v</span><span class="functiontext">e</span><span class="plain">l</span><span class="functiontext">u</span><span class="plain"> d</span><span class="functiontext">e</span><span class="plain">s m</span><span class="functiontext">ou</span><span class="plain">ch</span><span class="functiontext">e</span><span class="plain">s écl</span><span class="functiontext">a</span><span class="plain">t</span><span class="functiontext">a</span><span class="plain">nt</span><span class="functiontext">e</span><span class="plain">s</span>
+    <span class="plain">Q</span><span class="functiontext">ui</span><span class="plain"> b</span><span class="functiontext">o</span><span class="plain">mb</span><span class="functiontext">i</span><span class="plain">n</span><span class="functiontext">e</span><span class="plain">nt </span><span class="functiontext">au</span><span class="plain">t</span><span class="functiontext">ou</span><span class="plain">r d</span><span class="functiontext">e</span><span class="plain">s p</span><span class="functiontext">ua</span><span class="plain">nt</span><span class="functiontext">eu</span><span class="plain">rs cr</span><span class="functiontext">ue</span><span class="plain">ll</span><span class="functiontext">e</span><span class="plain">s,</span>
+</pre>
+
+<p class="inwebparagraph">3. The split <code class="display"><span class="extract">instances of X</span></code> narrows in on each usage of the text <code class="display"><span class="extract">X</span></code> inside
 the snippet. For example,
 </p>

-
 <pre class="display">
-        <span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
-            <span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> == </span><span class="reserved">{</span>
-                <span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
-            <span class="reserved">}</span>
-        <span class="reserved">}</span>
+    <span class="plain">Name: </span><span class="string">"LineageExample"</span>
+    <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">	</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
+    <span class="plain">	</span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"son"</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="reserved">}</span>
+
 </pre>

-<p class="inwebparagraph">gives every usage of <code class="display"><span class="extract">==</span></code> the colour <code class="display"><span class="extract">!reserved</span></code>. Note that it never runs in
-an overlapping way: the snippet <code class="display"><span class="extract">===</span></code> would be considered as having only one
-instance of <code class="display"><span class="extract">==</span></code> (the first two characters), while <code class="display"><span class="extract">====</span></code> would have two.
-</p>
-
-<p class="inwebparagraph"><a id="SP22"></a><b>&#167;22.  </b>Another kind of block is <code class="display"><span class="extract">runs of C</span></code>, where <code class="display"><span class="extract">C</span></code> is a colour. For example:
+<p class="inwebparagraph">acts on the text:
 </p>


 <pre class="display">
-        <span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
-            <span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
-                <span class="identifier">printf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">function</span>
-                <span class="identifier">sscanf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">function</span>
-            <span class="reserved">}</span>
-        <span class="reserved">}</span>
+    <span class="plain">Jacob first appears in the Book of Genesis, the son of Isaac and Rebecca, the</span>
+    <span class="plain">grandson of Abraham, Sarah and Bethuel, the nephew of Ishmael.</span>
 </pre>

-<p class="inwebparagraph">If this runs on the line <code class="display"><span class="extract">if (x == 1) printf("Hello!");</span></code>, then the inner
-block will run three times: its snippet will be <code class="display"><span class="extract">if</span></code>, then <code class="display"><span class="extract">x</span></code>, then <code class="display"><span class="extract">printf</span></code>.
-The rules inside the block will take effect only on the third time, when it
-will paint the word <code class="display"><span class="extract">printf</span></code> in <code class="display"><span class="extract">!function</span></code> colour.
+<p class="inwebparagraph">to produce:
+</p>
+
+
+<pre class="display">
+    <span class="plain">Jacob first appears in the Book of Genesis, the </span><span class="functiontext">son</span><span class="plain"> of Isaac and Rebecca, the</span>
+    <span class="plain">grand</span><span class="functiontext">son</span><span class="plain"> of Abraham, Sarah and Bethuel, the nephew of Ishmael.</span>
+</pre>
+
+<p class="inwebparagraph">Note that it never runs in an overlapping way: the snippet <code class="display"><span class="extract">===</span></code> would be
+considered as having only one instance of <code class="display"><span class="extract">==</span></code> (the first two characters),
+while <code class="display"><span class="extract">====</span></code> would have two.
+</p>
+
+<p class="inwebparagraph">4. The split <code class="display"><span class="extract">runs of C</span></code>, where <code class="display"><span class="extract">C</span></code> describes a colour, splits the snippet
+into non-overlapping contiguous pieces which have that colour. For example:
+</p>
+
+<pre class="display">
+    <span class="plain">Name: </span><span class="string">"RunningExample"</span>
+    <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">	</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
+    <span class="plain">	</span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"0123456789"</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="plain">	</span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!plain</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="string">"-"</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="reserved">}</span>
+
+</pre>
+
+<p class="inwebparagraph">acts on:
+</p>
+
+
+<pre class="display">
+    <span class="plain">Napoleon Bonaparte (1769-1821) took 167 scientists to Egypt in 1798,</span>
+    <span class="plain">who published their so-called Memoirs over the period 1798-1801.</span>
+</pre>
+
+<p class="inwebparagraph">to produce:
+</p>
+
+
+<pre class="display">
+    <span class="plain">Napoleon Bonaparte (</span><span class="functiontext">1769-1821</span><span class="plain">) took </span><span class="functiontext">167</span><span class="plain"> scientists to Egypt in </span><span class="functiontext">1798</span><span class="plain">,</span>
+    <span class="plain">who published their so-called Memoirs over the period </span><span class="functiontext">1798-1801</span><span class="plain">.</span>
+</pre>
+
+<p class="inwebparagraph">Here the hyphens in number ranges have been coloured, but not the hyphen
+in "so-called".
+</p>
+
+<p class="inwebparagraph">A more computer-science sort of example would be:
+</p>
+
+<pre class="display">
+    <span class="plain">Name: </span><span class="string">"StdioExample"</span>
+    <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">	</span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="string">printf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">		</span><span class="string">sscanf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="reserved">}</span>
+
+</pre>
+
+<p class="inwebparagraph">which acts on:
+</p>
+
+
+<pre class="display">
+    <span class="plain">if (x == 1) printf("Hello!");</span>
+</pre>
+
+<p class="inwebparagraph">to produce:
+</p>
+
+
+<pre class="display">
+    <span class="identifier">if</span><span class="plain"> (</span><span class="identifier">x</span><span class="plain"> == </span><span class="constant">1</span><span class="plain">) </span><span class="functiontext">printf</span><span class="plain">("</span><span class="identifier">Hello</span><span class="plain">!");</span>
+</pre>
+
+<p class="inwebparagraph">The split divides the line up into three runs, and the inner block runs three
+times: on <code class="display"><span class="extract">if</span></code>, then <code class="display"><span class="extract">x</span></code>, then <code class="display"><span class="extract">printf</span></code>. Only the third time has any effect.
 </p>

 <p class="inwebparagraph">As a special form, <code class="display"><span class="extract">runs of unquoted</span></code> means "runs of characters not painted
@ -535,30 +694,124 @@ either with <code class="display"><span class="extract">!string</span></code> or
 not a colour.
 </p>

-<p class="inwebparagraph"><a id="SP23"></a><b>&#167;23.  </b>It remains to specify what rules can do. As noted, they take the form
-"if X, then Y". The following are the possibilities for X, the condition:
+<p class="inwebparagraph">5. The split <code class="display"><span class="extract">matches of /E/</span></code>, where <code class="display"><span class="extract">/E/</span></code> is a regular expression (see below),
+splits the snippet up into non-overlapping pieces which match it: possibly
+none at all, of course, in which case the block of rules is never used.
+This is easier to demonstrate than explain:
 </p>

-<p class="inwebparagraph">1. X can be omitted altogether, and then the rule always applies. For example,
-this somewhat nihilistic program gets rid of colouring entirely:
+<pre class="display">
+    <span class="plain">Name: </span><span class="string">"AssemblageExample"</span>
+    <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">	</span><span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\.[A-Za-z_][A-Za-z_0-9]*/</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="reserved">}</span>
+
+</pre>
+
+<p class="inwebparagraph">which acts on:
 </p>


 <pre class="display">
-        <span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
-            <span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">plain</span>
+            <span class="plain">JSR .initialise</span>
+            <span class="plain">LDR A, #.data</span>
+            <span class="plain">RTS</span>
+        <span class="plain">.initialise</span>
+            <span class="plain">TAX</span>
+</pre>
+
+<p class="inwebparagraph">to produce:
+</p>
+
+
+<pre class="display">
+            <span class="identifier">JSR</span><span class="plain"> </span><span class="functiontext">.initialise</span>
+            <span class="identifier">LDR</span><span class="plain"> </span><span class="identifier">A</span><span class="plain">, #</span><span class="functiontext">.data</span>
+            <span class="identifier">RTS</span>
+        <span class="functiontext">.initialise</span>
+            <span class="identifier">TAX</span>
+</pre>
+
+<p class="inwebparagraph">6. Lastly, the split <code class="display"><span class="extract">brackets in /E/</span></code> matches the snippet against the
+regular expression <code class="display"><span class="extract">E</span></code>, and then runs the rules on each bracketed
+subexpression in turn. (If there is no match, or there are no bracketed
+terms in <code class="display"><span class="extract">E</span></code>, nothing happens.)
+</p>
+
+<pre class="display">
+    <span class="plain">Name: </span><span class="string">"EquationsExample"</span>
+    <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">	</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
+    <span class="plain">	</span><span class="reserved">brackets</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="functiontext">/.*?([A-Z])\s*=\s*(\d+).*/</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="reserved">}</span>
+
+</pre>
+
+<p class="inwebparagraph">acts on:
+</p>
+
+
+<pre class="display">
+        <span class="plain">A = 2716</span>
+        <span class="plain">B=3</span>
+        <span class="plain">C =715 + B</span>
+        <span class="plain">D &lt; 14</span>
+</pre>
+
+<p class="inwebparagraph">to produce:
+</p>
+
+
+<pre class="display">
+        <span class="functiontext">A</span><span class="plain"> = </span><span class="functiontext">2716</span>
+        <span class="functiontext">B</span><span class="plain">=</span><span class="functiontext">3</span>
+        <span class="functiontext">C</span><span class="plain"> =</span><span class="functiontext">715</span><span class="plain"> + B</span>
+        <span class="plain">D &lt; 14</span>
+</pre>
+
+<p class="inwebparagraph">What happens here is that the expression has two bracketed terms, one for
+the letter, one for the number; the rule is run first on the letter, then
+on the number, and both are turned to <code class="display"><span class="extract">!function</span></code>.
+</p>
+
+<p class="inwebparagraph"><a id="SP23"></a><b>&#167;23. The seven ways rules can apply. </b>Rules are the lines with a <code class="display"><span class="extract">=&gt;</span></code> in. As noted, they take the form "if X, then
+Y". The following are the possibilities for X, the condition.
+</p>
+
+<p class="inwebparagraph">1. The easiest thing is to give nothing at all, and then the rule always
+applies. For example, this somewhat nihilistic program gets rid of colouring
+entirely:
+</p>
+
+
+<pre class="display">
+        <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
        <span class="reserved">}</span>
 </pre>

-<p class="inwebparagraph">2. X can require the whole snippet to be of a particular colour, by writing
-<code class="display"><span class="extract">colour C</span></code>. For example:
+<p class="inwebparagraph">2. If X is a piece of literal text, the rule applies when the snippet is
+exactly that text. For example,
 </p>


 <pre class="display">
-        <span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
-            <span class="identifier">characters</span><span class="plain"> </span><span class="reserved">{</span>
-                <span class="identifier">colour</span><span class="plain"> !</span><span class="element">character</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">plain</span>
+        <span class="string">printf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+</pre>
+
+<p class="inwebparagraph">3. X can require the whole snippet to be of a particular colour, by writing
+<code class="display"><span class="extract">coloured C</span></code>. For example:
+</p>
+
+
+<pre class="display">
+        <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
+                <span class="reserved">coloured</span><span class="plain"> </span><span class="element">!character</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
            <span class="reserved">}</span>
        <span class="reserved">}</span>
 </pre>
@ -566,21 +819,23 @@ this somewhat nihilistic program gets rid of colouring entirely:
 <p class="inwebparagraph">removes the syntax colouring on character literals.
 </p>

-<p class="inwebparagraph">3. X can require the snippet to be one of the language's known keywords, as
+<p class="inwebparagraph">4. X can require the snippet to be one of the language's known keywords, as
 declared earlier in the ILD by a <code class="display"><span class="extract">keyword</span></code> command. The syntax here is
 <code class="display"><span class="extract">keyword of C</span></code>, where <code class="display"><span class="extract">C</span></code> is a colour. For example:
 </p>


 <pre class="display">
-        <span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
+        <span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
 </pre>

 <p class="inwebparagraph">says: if the snippet is a keyword declared as being of colour <code class="display"><span class="extract">!element</span></code>,
-then actually colour it that way.
+then actually colour it that way. (This is much faster than making many
+comparison rules in a row, one for each keyword in the language; Inweb has
+put all of the registered keywords into a hash table for rapid lookup.)
 </p>

-<p class="inwebparagraph">4. X can look at a little context before or after the snippet, testing it
+<p class="inwebparagraph">5. X can look at a little context before or after the snippet, testing it
 with one of the following: <code class="display"><span class="extract">prefix P</span></code>, <code class="display"><span class="extract">spaced prefix P</span></code>,
 <code class="display"><span class="extract">optionally spaced prefix P</span></code>. These qualifiers have to do with whether white
 space must appear after <code class="display"><span class="extract">P</span></code> and before the snippet. For example,
@ -588,8 +843,8 @@ space must appear after <code class="display"><span class="extract">P</span></co


 <pre class="display">
-        <span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
-            <span class="identifier">prefix</span><span class="plain"> </span><span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> -&gt; </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
+        <span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="reserved">prefix</span><span class="plain"> </span><span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> -&gt; </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
        <span class="reserved">}</span>
 </pre>

@ -597,18 +852,103 @@ space must appear after <code class="display"><span class="extract">P</span></co
 as <code class="display"><span class="extract">!element</span></code>. Similarly for <code class="display"><span class="extract">suffix</span></code>.
 </p>

-<p class="inwebparagraph">5. And otherwise X is literal text, and the rule applies if and only if
-the snippet is exactly that text. For example,
+<p class="inwebparagraph">6. X can test the snippet against a regular expression, with <code class="display"><span class="extract">matching /E/</span></code>.
+For example:
 </p>


 <pre class="display">
-        <span class="identifier">printf</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">function</span>
+        <span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="reserved">matching</span><span class="plain"> </span><span class="functiontext">/.*x.*/</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
+        <span class="reserved">}</span>
+</pre>
+
+<p class="inwebparagraph">...turns any identifier containing a lower-case <code class="display"><span class="extract">x</span></code> into <code class="display"><span class="extract">!element</span></code> colour.
+Note that <code class="display"><span class="extract">matching /x/</span></code> would not have worked, because our regular expression
+is required to match the entire snippet, not just somewhere inside.
+</p>
+
+
+<pre class="display">
+        <span class="reserved">characters</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="string">"0123456789"</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="reserved">matching</span><span class="plain"> </span><span class="functiontext">/\d\d\d\d/</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
+        <span class="reserved">}</span>
+</pre>
+
+<p class="inwebparagraph">...colours all four-digit numbers, but no others.
+</p>
+
+<p class="inwebparagraph">7. Whenever a split takes place, Inweb keeps count of how many pieces there are,
+and different rules can apply to differently numbered pieces. The notation
+is <code class="display"><span class="extract">number N</span></code>, where <code class="display"><span class="extract">N</span></code> is the number, counting from 1. For example,
+</p>
+
+<pre class="display">
+    <span class="plain">Name: </span><span class="string">"ThirdExample"</span>
+    <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">	</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
+    <span class="plain">	</span><span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\S+/</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="string">number</span><span class="plain"> </span><span class="string">3</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="reserved">}</span>
+
+</pre>
+
+<p class="inwebparagraph">acts on:
+</p>
+
+
+<pre class="display">
+    <span class="plain">With how sad steps, O Moon, thou climb'st the skies!</span>
+    <span class="plain">How silently, and with how wan a face!</span>
+    <span class="plain">What, may it be that even in heav'nly place</span>
+    <span class="plain">That busy archer his sharp arrows tries!</span>
+    <span class="plain">Sure, if that long-with love-acquainted eyes</span>
+    <span class="plain">Can judge of love, thou feel'st a lover's case,</span>
+    <span class="plain">I read it in thy looks; thy languish'd grace</span>
+    <span class="plain">To me, that feel the like, thy state descries.</span>
+    <span class="plain">Then, ev'n of fellowship, O Moon, tell me,</span>
+    <span class="plain">Is constant love deem'd there but want of wit?</span>
+    <span class="plain">Are beauties there as proud as here they be?</span>
+    <span class="plain">Do they above love to be lov'd, and yet</span>
+    <span class="plain">Those lovers scorn whom that love doth possess?</span>
+    <span class="plain">Do they call virtue there ungratefulness?</span>
+</pre>
+
+<p class="inwebparagraph">to produce:
+</p>
+
+
+<pre class="display">
+    <span class="plain">With how </span><span class="functiontext">sad</span><span class="plain"> steps, O Moon, thou climb'st the skies!</span>
+    <span class="plain">How silently, </span><span class="functiontext">and</span><span class="plain"> with how wan a face!</span>
+    <span class="plain">What, may </span><span class="functiontext">it</span><span class="plain"> be that even in heav'nly place</span>
+    <span class="plain">That busy </span><span class="functiontext">archer</span><span class="plain"> his sharp arrows tries!</span>
+    <span class="plain">Sure, if </span><span class="functiontext">that</span><span class="plain"> long-with love-acquainted eyes</span>
+    <span class="plain">Can judge </span><span class="functiontext">of</span><span class="plain"> love, thou feel'st a lover's case,</span>
+    <span class="plain">I read </span><span class="functiontext">it</span><span class="plain"> in thy looks; thy languish'd grace</span>
+    <span class="plain">To me, </span><span class="functiontext">that</span><span class="plain"> feel the like, thy state descries.</span>
+    <span class="plain">Then, ev'n </span><span class="functiontext">of</span><span class="plain"> fellowship, O Moon, tell me,</span>
+    <span class="plain">Is constant </span><span class="functiontext">love</span><span class="plain"> deem'd there but want of wit?</span>
+    <span class="plain">Are beauties </span><span class="functiontext">there</span><span class="plain"> as proud as here they be?</span>
+    <span class="plain">Do they </span><span class="functiontext">above</span><span class="plain"> love to be lov'd, and yet</span>
+    <span class="plain">Those lovers </span><span class="functiontext">scorn</span><span class="plain"> whom that love doth possess?</span>
+    <span class="plain">Do they </span><span class="functiontext">call</span><span class="plain"> virtue there ungratefulness?</span>
 </pre>

 <p class="inwebparagraph"></p>

-<p class="inwebparagraph"><a id="SP24"></a><b>&#167;24.  </b>Now let's look at the conclusion Y of a rule. Here the possibilities are
+<p class="inwebparagraph"><a id="SP24"></a><b>&#167;24.  </b>Any condition can be reversed by preceding it with <code class="display"><span class="extract">not</span></code>. For example,
+</p>
+
+
+<pre class="display">
+        <span class="reserved">not</span><span class="plain"> </span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!string</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
+</pre>
+
+<p class="inwebparagraph"></p>
+
+<p class="inwebparagraph"><a id="SP25"></a><b>&#167;25. The three ways rules can take effect. </b>Now let's look at the conclusion Y of a rule. Here the possibilities are
 simpler:
 </p>

@ -621,9 +961,9 @@ applied to the snippet only if this rule has matched. For example,


 <pre class="display">
-        <span class="identifier">keyword</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="reserved">{</span>
-            <span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> </span><span class="identifier">prefix</span><span class="plain"> . </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
-            <span class="identifier">optionally</span><span class="plain"> </span><span class="identifier">spaced</span><span class="plain"> </span><span class="identifier">prefix</span><span class="plain"> -&gt; </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
+        <span class="reserved">keyword</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> </span><span class="reserved">prefix</span><span class="plain"> . </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
+            <span class="reserved">optionally</span><span class="plain"> </span><span class="reserved">spaced</span><span class="plain"> </span><span class="reserved">prefix</span><span class="plain"> -&gt; </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
        <span class="reserved">}</span>
 </pre>

@ -637,55 +977,92 @@ the notation <code class="display"><span class="extract">=&gt; C on both</span><
 </p>

 <p class="inwebparagraph">3. If Y is the word <code class="display"><span class="extract">debug</span></code>, then the current snippet and its colouring
-are printed out on the command line.
-</p>
-
-<p class="inwebparagraph"><a id="SP25"></a><b>&#167;25.  </b>The syntax of ILDs tends to avoid superfluous quotation marks as confusing,
-but sometimes you need to be pedantic. If you want to match the text <code class="display"><span class="extract">=&gt;</span></code>,
-for example, that could lead to ambiguity with the rule marker <code class="display"><span class="extract">=&gt;</span></code>. For
-such occasions, simply put the text in double quotes, and change any literal
-double quote in it to <code class="display"><span class="extract">\"</span></code>, and use <code class="display"><span class="extract">\\</span></code> for a literal backslash. For example:
+are printed out on the command line. Thus:
 </p>


 <pre class="display">
-        <span class="string">"keyword"</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
+        <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+            <span class="reserved">matches</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="functiontext">/\d\S+/</span><span class="plain"> </span><span class="reserved">{</span>
+                <span class="reserved">=&gt;</span><span class="plain"> </span><span class="reserved">debug</span>
+            <span class="reserved">}</span>
+        <span class="reserved">}</span>
 </pre>

-<p class="inwebparagraph"></p>
+<p class="inwebparagraph">The rule <code class="display"><span class="extract">=&gt; debug</span></code> is unconditional, and will print whenever it's reached.
+</p>

-<p class="inwebparagraph"><a id="SP26"></a><b>&#167;26. Example. </b>Inweb Language Definition Format is a kind of language in itself, and in
+<p class="inwebparagraph"><a id="SP26"></a><b>&#167;26. The worm, Ouroboros. </b>Inweb Language Definition Format is a kind of language in itself, and in
 fact Inweb is supplied with an ILD for ILDF itself, which Inweb used to
 syntax-colour the examples above. Here it is, as syntax-coloured by itself:
 </p>

 <pre class="display">
-    <span class="identifier">Name</span><span class="plain">: </span><span class="identifier">ILDF</span>
-    <span class="identifier">Details</span><span class="plain">: </span><span class="identifier">The</span><span class="plain"> </span><span class="identifier">Inweb</span><span class="plain"> </span><span class="identifier">Language</span><span class="plain"> </span><span class="identifier">Definition</span><span class="plain"> </span><span class="identifier">File</span><span class="plain"> </span><span class="identifier">format</span>
-    <span class="identifier">Extension</span><span class="plain">: .</span><span class="identifier">ildf</span>
-    <span class="identifier">Whole</span><span class="plain"> </span><span class="identifier">Line</span><span class="plain"> </span><span class="identifier">Comment</span><span class="plain">: #</span>
+    <span class="plain">Name: </span><span class="string">"ILDF"</span>
+    <span class="plain">Details: </span><span class="string">"The Inweb Language Definition File format"</span>
+    <span class="plain">Extension: </span><span class="string">".ildf"</span>
+    <span class="plain">Whole Line Comment: </span><span class="string">"#"</span>
+    <span class="plain">Supports Namespaces: </span><span class="reserved">false</span>

-    <span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain">: </span><span class="string">"\""</span>
-    <span class="identifier">String</span><span class="plain"> </span><span class="identifier">Literal</span><span class="plain"> </span><span class="identifier">Escape</span><span class="plain">: \</span>
+    <span class="plain">String Literal: </span><span class="string">"\""</span>
+    <span class="plain">String Literal Escape: </span><span class="string">"\\"</span>

-    <span class="identifier">keyword</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span>
+    <span class="plain">#</span><span class="comment"> Regular expressions are handled here as if character literals</span>
+    <span class="plain">Character Literal: </span><span class="string">"/"</span>
+    <span class="plain">Character Literal Escape: </span><span class="string">"\\"</span>

-    <span class="identifier">colouring</span><span class="plain"> </span><span class="reserved">{</span>
-    <span class="plain">	</span><span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">identifier</span><span class="plain"> </span><span class="reserved">{</span>
-    <span class="plain">		</span><span class="identifier">prefix</span><span class="plain"> ! </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
-    <span class="plain">		</span><span class="identifier">keyword</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> !</span><span class="element">element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">element</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"both"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"brackets"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"characters"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"coloured"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"colouring"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"debug"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"false"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"in"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"instances"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"keyword"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"matches"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"matching"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"not"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"of"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"on"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"optionally"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"prefix"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"runs"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"spaced"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"suffix"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"true"</span>
+    <span class="reserved">keyword</span><span class="plain"> </span><span class="string">"unquoted"</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span>
+
+    <span class="reserved">colouring</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">	</span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="reserved">prefix</span><span class="plain"> </span><span class="string">"!"</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">on</span><span class="plain"> </span><span class="reserved">both</span>
+    <span class="plain">		</span><span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!element</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!element</span>
+    <span class="plain">		</span><span class="reserved">keyword</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">!reserved</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
    <span class="plain">	</span><span class="reserved">}</span>
-    <span class="plain">	</span><span class="identifier">runs</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="reserved">{</span>
-    <span class="plain">		</span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"=&gt;"</span><span class="plain"> </span><span class="reserved">{</span>
-    <span class="plain">			</span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
+    <span class="plain">	</span><span class="reserved">runs</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="element">unquoted</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		</span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"=&gt;"</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">			</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
    <span class="plain">		</span><span class="reserved">}</span>
-    <span class="plain">		</span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"{"</span><span class="plain"> </span><span class="reserved">{</span>
-    <span class="plain">			</span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
+    <span class="plain">		</span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"{"</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">			</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
    <span class="plain">		</span><span class="reserved">}</span>
-    <span class="plain">		</span><span class="identifier">instances</span><span class="plain"> </span><span class="identifier">of</span><span class="plain"> </span><span class="string">"}"</span><span class="plain"> </span><span class="reserved">{</span>
-    <span class="plain">			</span><span class="reserved">=&gt;</span><span class="plain"> !</span><span class="element">reserved</span>
+    <span class="plain">		</span><span class="reserved">instances</span><span class="plain"> </span><span class="reserved">of</span><span class="plain"> </span><span class="string">"}"</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">			</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!reserved</span>
    <span class="plain">		</span><span class="reserved">}</span>
    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="plain">	</span><span class="reserved">characters</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		#</span><span class="comment"> Anything left of these colours will be unquoted strings, so...</span>
+    <span class="plain">		</span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!constant</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!string</span>
+    <span class="plain">		</span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!identifier</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!string</span>
+    <span class="plain">		#</span><span class="comment"> Regular expressions, now coloured !character, are more like functions</span>
+    <span class="plain">		</span><span class="reserved">coloured</span><span class="plain"> </span><span class="element">!character</span><span class="plain"> </span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!function</span>
+    <span class="plain">	</span><span class="reserved">}</span>
+    <span class="plain">	#</span><span class="comment"> Detect Property: Value lines, not being fooled by a colon inside quotes</span>
+    <span class="plain">	</span><span class="reserved">brackets</span><span class="plain"> </span><span class="reserved">in</span><span class="plain"> </span><span class="functiontext">/\s*([A-Z][^"]*):.*/</span><span class="plain"> </span><span class="reserved">{</span>
+    <span class="plain">		#</span><span class="comment"> Uncolour only the bracketed part, i.e., the Property part</span>
+    <span class="plain">		</span><span class="reserved">=&gt;</span><span class="plain"> </span><span class="element">!plain</span>
+    <span class="plain">	</span><span class="reserved">}</span>
    <span class="reserved">}</span>

 </pre>
--- a/foundation-module/Chapter
+++ b/foundation-module/Chapter
@ -131,6 +131,7 @@ typedef struct match_results {
 	int no_matched_texts;
 	struct match_result exp_storage[MAX_BRACKETED_SUBEXPRESSIONS];
 	struct text_stream *exp[MAX_BRACKETED_SUBEXPRESSIONS];
+	int exp_at[MAX_BRACKETED_SUBEXPRESSIONS];
 } match_results;

@ Match result objects are inherently ephemeral, and we can expect to be
@ -143,8 +144,10 @@ deallocate.
 match_results Regexp::create_mr(void) {
 	match_results mr;
 	mr.no_matched_texts = 0;
-	for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++)
+	for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++) {
 		mr.exp[i] = NULL;
+		mr.exp_at[i] = -1;
+	}
 	return mr;
 }

@ -169,10 +172,27 @@ int Regexp::match(match_results *mr, text_stream *text, wchar_t *pattern) {
 	return rv;
 }

+int Regexp::match_from(match_results *mr, text_stream *text, wchar_t *pattern,
+	int x, int allow_partial) {
+	int match_to = x;
+	if (x < Str::len(text)) {
+		if (mr) Regexp::prepare(mr);
+		match_position at;
+		at.tpos = x; at.ppos = 0; at.bc = 0; at.bl = 0;
+		match_to = Regexp::match_r(mr, text, pattern, &at, allow_partial);
+		if (match_to == -1) {
+			match_to = x;
+			if (mr) Regexp::dispose_of(mr);
+		}
+	}
+	return match_to - x;
+}
+
 void Regexp::prepare(match_results *mr) {
 	if (mr) {
 		mr->no_matched_texts = 0;
 		for (int i=0; i<MAX_BRACKETED_SUBEXPRESSIONS; i++) {
+			mr->exp_at[i] = -1;
 			if (mr->exp[i]) STREAM_CLOSE(mr->exp[i]);
 			mr->exp_storage[i].match_text_struct =
 				Streams::new_buffer(
@ -276,6 +296,7 @@ to implement numeric repetition counts, which we won't need:
 			Str::clear(mr->exp[i]);
 			for (int j = at.brackets_start[i]; j <= at.brackets_end[i]; j++)
 				PUT_TO(mr->exp[i], Str::get_at(text, j));
+			mr->exp_at[i] = at.brackets_start[i];
 		}
 		mr->no_matched_texts = at.bc;
 	}
@ -290,8 +311,9 @@ says |q|, the only match is with a lower-case letter "q"), except that:
 (e) |%i| means any character from the identifier class (see above);
 (f) |%p| means any character which can be used in the name of a Preform
 nonterminal, which is to say, an identifier character or a hyphen;
-(g) |%P| means the same or else a colon.
-(h) |%t| means a tab.
+(g) |%P| means the same or else a colon;
+(h) |%t| means a tab;
+(i) |%q| means a double-quote.

 |%| otherwise makes a literal escape; a space means any whitespace character;
 square brackets enclose literal alternatives, and note as usual with grep
@ -330,9 +352,10 @@ int Regexp::get_cclass(wchar_t *pattern, int ppos, int *len, int *from, int *to,
 			}
 			*from = ppos; *to = ppos; return LITERAL_CLASS;
 		case '[':
-			*from = ppos+2;
+			*from = ppos+1;
+			ppos += 2;
 			while ((pattern[ppos]) && (pattern[ppos] != ']')) ppos++;
-			*to = ppos - 1; *len = ppos - *from + 1;
+			*to = ppos - 1; *len = ppos - *from + 2;
 			return LITERAL_CLASS;
 		case ' ':
 			*len = 1; return WHITESPACE_CLASS;
@ -358,6 +381,9 @@ int Regexp::test_cclass(int c, int chcl, int range_from, int range_to, wchar_t *
 			((c >= 'a') && (c <= 'z')) ||
 			((c >= '0') && (c <= '9'))) match = TRUE; break;
 		case LITERAL_CLASS:
+			if ((range_to > range_from) && (drawn_from[range_from] == '^')) {
+				range_from++; reverse = reverse?FALSE:TRUE;
+			}
 			for (int j = range_from; j <= range_to; j++) {
 				int c1 = drawn_from[j], c2 = c1;
 				if ((j+1 < range_to) && (drawn_from[j+1] == '-')) { c2 = drawn_from[j+2]; j += 2; }