[Indexer::] The Indexer. To construct indexes of the material woven, following a template. @h Cover sheets. The indexer offers two basic services. One, which is much simpler, makes cover sheets, and has only simple escapes (except that it has the ability to call the fuller indexing service if need be, using |[[Template T]]|). = void Indexer::cover_sheet_maker(OUTPUT_STREAM, web *W, text_stream *unextended_leafname, weave_target *wt, int halves) { cover_sheet_state state; @; TEMPORARY_TEXT(extended_leafname); WRITE_TO(extended_leafname, "%S%S", unextended_leafname, Formats::file_extension(wt->format)); filename *cs_filename = Patterns::obtain_filename(wt->pattern, extended_leafname); DISCARD_TEXT(extended_leafname); TextFiles::read(cs_filename, FALSE, "can't open cover sheet file", TRUE, Indexer::scan_cover_line, NULL, (void *) &state); } @ The cover-sheet-maker has the ability to weave only the top half, or only the bottom half, of the template; they are divided by the marker |[[Code]]|. @d WEAVE_FIRST_HALF 1 @d WEAVE_SECOND_HALF 2 @d IN_SECOND_HALF 4 = typedef struct cover_sheet_state { struct text_stream *WEAVE_COVER_TO; int halves; /* a bitmap of the above values */ struct weave_target *target; } cover_sheet_state; @ = state.halves = halves; state.WEAVE_COVER_TO = OUT; state.target = wt; @ The above, then, iterates the following routine on each line of the template file one by one, passing it a pointer to an instance of the above state structure. = void Indexer::scan_cover_line(text_stream *line, text_file_position *tfp, void *v_state) { cover_sheet_state *state = (cover_sheet_state *) v_state; text_stream *OUT = state->WEAVE_COVER_TO; int include = FALSE; if (((state->halves & WEAVE_FIRST_HALF) && ((state->halves & IN_SECOND_HALF) == 0)) || ((state->halves & WEAVE_SECOND_HALF) && (state->halves & IN_SECOND_HALF))) include = TRUE; TEMPORARY_TEXT(matter); Str::copy(matter, line); match_results mr = Regexp::create_mr(); if ((include) && ((state->target->self_contained) || (state->target->pattern->embed_CSS)) && (Regexp::match(&mr, matter, L" *%target->pattern, mr.exp[0]); Indexer::transcribe_CSS(matter, CSS_file); } else { while (Regexp::match(&mr, matter, L"(%c*?)%[%[(%c*?)%]%](%c*)")) { text_stream *left = mr.exp[0]; text_stream *command = mr.exp[1]; text_stream *right = mr.exp[2]; if (include) WRITE("%S", left); @; Str::copy(matter, right); } } Regexp::dispose_of(&mr); if (include) WRITE("%S\n", matter); DISCARD_TEXT(matter); } @ = match_results mr2 = Regexp::create_mr(); if (Str::eq_wide_string(command, L"Code")) { state->halves |= IN_SECOND_HALF; } else if (Str::eq_wide_string(command, L"Cover Sheet")) { if (include) @; } else if (Regexp::match(&mr2, command, L"Template (%c*?)")) { if (include) @; } else if (Bibliographic::data_exists(state->target->weave_web, command)) { if (include) @; } else { if (include) WRITE("%S", command); } Regexp::dispose_of(&mr2); @ = if (state->target->pattern->based_on) { weave_pattern *saved = state->target->pattern; state->target->pattern = state->target->pattern->based_on; Indexer::cover_sheet_maker(OUT, state->target->weave_web, I"cover-sheet", state->target, (state->halves & (WEAVE_FIRST_HALF + WEAVE_SECOND_HALF))); state->target->pattern = saved; } else { Errors::in_text_file("cover sheet recursively includes itself", tfp); } @ = filename *CF = Patterns::obtain_filename(state->target->pattern, mr2.exp[0]); if (CF == NULL) Errors::in_text_file("pattern does not provide this template file", tfp); else Indexer::run(state->target->weave_web, state->target->weave_range, CF, NULL, OUT, state->target->pattern); @ = WRITE("%S", Bibliographic::get_datum(state->target->weave_web, command)); @h Full index pages. This is a much more substantial service, and operates as a little processor interpreting a meta-language all of its very own, with a stack for holding nested repeat loops, and a program counter and -- well, and nothing else to speak of, in fact, except for the slightly unusual way that loop variables provide context by changing the subject of what is discussed rather than by being accessed directly. The current state of the processor is recorded in the following. @d MAX_TEMPLATE_LINES 256 /* maximum number of lines in template */ @d CI_STACK_CAPACITY 8 /* maximum recursion of chapter/section iteration */ = typedef struct contents_processor { text_stream *tlines[MAX_TEMPLATE_LINES]; int no_tlines; int repeat_stack_level[CI_STACK_CAPACITY]; linked_list_item *repeat_stack_variable[CI_STACK_CAPACITY]; linked_list_item *repeat_stack_threshold[CI_STACK_CAPACITY]; int repeat_stack_startpos[CI_STACK_CAPACITY]; int stack_pointer; /* And this is our stack pointer for tracking of loops */ text_stream *restrict_to_range; } contents_processor; contents_processor Indexer::new_processor(text_stream *range) { contents_processor cp; cp.no_tlines = 0; cp.restrict_to_range = Str::duplicate(range); cp.stack_pointer = 0; return cp; } @h Running the interpreter. @d TRACE_CI_EXECUTION FALSE /* set true for debugging */ = void Indexer::run(web *W, text_stream *range, filename *template_filename, text_stream *contents_page_leafname, text_stream *write_to, weave_pattern *pattern) { contents_processor actual_cp = Indexer::new_processor(range); contents_processor *cp = &actual_cp; text_stream TO_struct; text_stream *OUT = &TO_struct; @; @; int lpos = 0; /* This is our program counter: a line number in the template */ while (lpos < cp->no_tlines) { match_results mr = Regexp::create_mr(); TEMPORARY_TEXT(tl); Str::copy(tl, cp->tlines[lpos++]); /* Fetch the line at the program counter and advance */ @; WRITE("%S\n", tl); /* Copy the now finished line to the output */ DISCARD_TEXT(tl); CYCLE: ; Regexp::dispose_of(&mr); } if (write_to == NULL) STREAM_CLOSE(OUT); } @ = if (Regexp::match(&mr, tl, L"(%c*?) ")) Str::copy(tl, mr.exp[0]); /* Strip trailing spaces */ if (TRACE_CI_EXECUTION) @; if ((pattern->embed_CSS) && (Regexp::match(&mr, tl, L" *%; @; @; DISCARD_TEXT(command); } @; @; @h File handling. @ = TextFiles::read(template_filename, FALSE, "can't find contents template", TRUE, Indexer::save_template_line, NULL, cp); if (TRACE_CI_EXECUTION) PRINT("Read template <%f>: %d line(s)\n", template_filename, cp->no_tlines); @ With the following iterator: = void Indexer::save_template_line(text_stream *line, text_file_position *tfp, void *void_cp) { contents_processor *cp = (contents_processor *) void_cp; if (cp->no_tlines < MAX_TEMPLATE_LINES) cp->tlines[cp->no_tlines++] = Str::duplicate(line); } @ = pathname *H = W->redirect_weaves_to; if (H == NULL) H = Reader::woven_folder(W); if (write_to) OUT = write_to; else { filename *Contents = Filenames::in_folder(H, contents_page_leafname); if (STREAM_OPEN_TO_FILE(OUT, Contents, ISO_ENC) == FALSE) Errors::fatal_with_file("unable to write contents file", Contents); if (W->as_ebook) Epub::note_page(W->as_ebook, Contents, I"Index", I"index"); PRINT("[Index file: %f]\n", Contents); } @h The repeat stack and loops. @ = PRINT("%04d: %S\nStack:", lpos-1, tl); for (int j=0; jstack_pointer; j++) { if (cp->repeat_stack_level[j] == CHAPTER_LEVEL) PRINT(" %d: %S/%S", j, ((chapter *) CONTENT_IN_ITEM(cp->repeat_stack_variable[j], chapter))->ch_range, ((chapter *) CONTENT_IN_ITEM(cp->repeat_stack_threshold[j], chapter))->ch_range); else if (cp->repeat_stack_level[j] == SECTION_LEVEL) PRINT(" %d: %S/%S", j, ((section *) CONTENT_IN_ITEM(cp->repeat_stack_variable[j], section))->range, ((section *) CONTENT_IN_ITEM(cp->repeat_stack_threshold[j], section))->range); } PRINT("\n"); @ We start the direct commands with Select, which is implemented as a one-iteration loop in which the loop variable has the given section or chapter as its value during the sole iteration. @ = match_results mr = Regexp::create_mr(); if (Regexp::match(&mr, command, L"Select (%c*)")) { chapter *C; section *S; LOOP_OVER_LINKED_LIST(C, chapter, W->chapters) LOOP_OVER_LINKED_LIST(S, section, C->sections) if (Str::eq(S->range, mr.exp[0])) { Indexer::start_CI_loop(cp, SECTION_LEVEL, S_item, S_item, lpos); Regexp::dispose_of(&mr); goto CYCLE; } LOOP_OVER_LINKED_LIST(C, chapter, W->chapters) if (Str::eq(C->ch_range, mr.exp[0])) { Indexer::start_CI_loop(cp, CHAPTER_LEVEL, C_item, C_item, lpos); Regexp::dispose_of(&mr); goto CYCLE; } Errors::at_position("don't recognise the chapter or section abbreviation range", template_filename, lpos); Regexp::dispose_of(&mr); goto CYCLE; } @ Next, a genuine loop beginning: @ = int loop_level = 0; if (Regexp::match(&mr, command, L"Repeat Chapter")) loop_level = CHAPTER_LEVEL; if (Regexp::match(&mr, command, L"Repeat Section")) loop_level = SECTION_LEVEL; if (loop_level != 0) { linked_list_item *from = NULL, *to = NULL; linked_list_item *CI = FIRST_ITEM_IN_LINKED_LIST(chapter, W->chapters); while ((CI) && (CONTENT_IN_ITEM(CI, chapter)->imported)) CI = NEXT_ITEM_IN_LINKED_LIST(CI, chapter); if (loop_level == CHAPTER_LEVEL) { from = CI; to = LAST_ITEM_IN_LINKED_LIST(chapter, W->chapters); if (Str::eq_wide_string(cp->restrict_to_range, L"0") == FALSE) { chapter *C; LOOP_OVER_LINKED_LIST(C, chapter, W->chapters) if (Str::eq(C->ch_range, cp->restrict_to_range)) { from = C_item; to = from; break; } } } if (loop_level == SECTION_LEVEL) { chapter *within_chapter = CONTENT_IN_ITEM(Indexer::heading_topmost_on_stack(cp, CHAPTER_LEVEL), chapter); if (within_chapter == NULL) { if (CI) { chapter *C = CONTENT_IN_ITEM(CI, chapter); from = FIRST_ITEM_IN_LINKED_LIST(section, C->sections); } chapter *LC = LAST_IN_LINKED_LIST(chapter, W->chapters); if (LC) to = LAST_ITEM_IN_LINKED_LIST(section, LC->sections); } else { from = FIRST_ITEM_IN_LINKED_LIST(section, within_chapter->sections); to = LAST_ITEM_IN_LINKED_LIST(section, within_chapter->sections); } } if (from) Indexer::start_CI_loop(cp, loop_level, from, to, lpos); goto CYCLE; } @ And at the other bookend: @ = if ((Regexp::match(&mr, command, L"End Repeat")) || (Regexp::match(&mr, command, L"End Select"))) { if (cp->stack_pointer <= 0) Errors::at_position("stack underflow on contents template", template_filename, lpos); if (cp->repeat_stack_level[cp->stack_pointer-1] == SECTION_LEVEL) { linked_list_item *SI = cp->repeat_stack_variable[cp->stack_pointer-1]; if ((SI == cp->repeat_stack_threshold[cp->stack_pointer-1]) || (NEXT_ITEM_IN_LINKED_LIST(SI, section) == NULL)) Indexer::end_CI_loop(cp); else { cp->repeat_stack_variable[cp->stack_pointer-1] = NEXT_ITEM_IN_LINKED_LIST(SI, section); lpos = cp->repeat_stack_startpos[cp->stack_pointer-1]; /* Back round loop */ } } else { linked_list_item *CI = cp->repeat_stack_variable[cp->stack_pointer-1]; if (CI == cp->repeat_stack_threshold[cp->stack_pointer-1]) Indexer::end_CI_loop(cp); else { cp->repeat_stack_variable[cp->stack_pointer-1] = NEXT_ITEM_IN_LINKED_LIST(CI, chapter); lpos = cp->repeat_stack_startpos[cp->stack_pointer-1]; /* Back round loop */ } } goto CYCLE; } @ It can happen that a section loop, at least, is empty: @ = for (int rstl = cp->stack_pointer-1; rstl >= 0; rstl--) if (cp->repeat_stack_level[cp->stack_pointer-1] == SECTION_LEVEL) { linked_list_item *SI = cp->repeat_stack_threshold[cp->stack_pointer-1]; if (NEXT_ITEM_IN_LINKED_LIST(SI, section) == cp->repeat_stack_variable[cp->stack_pointer-1]) goto CYCLE; } @ If called with level |CHAPTER_LEVEL|, this returns the topmost chapter number on the stack; and similarly for |SECTION_LEVEL|. @d CHAPTER_LEVEL 1 @d SECTION_LEVEL 2 = linked_list_item *Indexer::heading_topmost_on_stack(contents_processor *cp, int level) { for (int rstl = cp->stack_pointer-1; rstl >= 0; rstl--) if (cp->repeat_stack_level[rstl] == level) return cp->repeat_stack_variable[rstl]; return NULL; } @ This is the code for starting a loop, which stacks up the details, and similarly for ending it by popping them again: = void Indexer::start_CI_loop(contents_processor *cp, int level, linked_list_item *from, linked_list_item *to, int pos) { if (cp->stack_pointer < CI_STACK_CAPACITY) { cp->repeat_stack_level[cp->stack_pointer] = level; cp->repeat_stack_variable[cp->stack_pointer] = from; cp->repeat_stack_threshold[cp->stack_pointer] = to; cp->repeat_stack_startpos[cp->stack_pointer++] = pos; } } void Indexer::end_CI_loop(contents_processor *cp) { cp->stack_pointer--; } @h Variable substitutions. We can now forget about this tiny stack machine: the one task left is to take a line from the template, and make substitutions of variables into its square-bracketed parts. @ = int slen, spos; while ((spos = Regexp::find_expansion(tl, '[', '[', ']', ']', &slen)) >= 0) { TEMPORARY_TEXT(left_part); TEMPORARY_TEXT(varname); TEMPORARY_TEXT(right_part); Str::substr(left_part, Str::start(tl), Str::at(tl, spos)); Str::substr(varname, Str::at(tl, spos+2), Str::at(tl, spos+slen-2)); Str::substr(right_part, Str::at(tl, spos+slen), Str::end(tl)); TEMPORARY_TEXT(substituted); match_results mr = Regexp::create_mr(); if (Bibliographic::data_exists(W, varname)) { @; } else if (Regexp::match(&mr, varname, L"Chapter (%c+)")) { text_stream *detail = mr.exp[0]; chapter *C = CONTENT_IN_ITEM( Indexer::heading_topmost_on_stack(cp, CHAPTER_LEVEL), chapter); if (C == NULL) Errors::at_position("no chapter is currently selected", template_filename, lpos); else @; } else if (Regexp::match(&mr, varname, L"Section (%c+)")) { text_stream *detail = mr.exp[0]; section *S = CONTENT_IN_ITEM( Indexer::heading_topmost_on_stack(cp, SECTION_LEVEL), section); if (S == NULL) Errors::at_position("no section is currently selected", template_filename, lpos); else @; } else if (Regexp::match(&mr, varname, L"Complete (%c+)")) { text_stream *detail = mr.exp[0]; @; } else { WRITE_TO(substituted, "%S", varname); } Str::clear(tl); WRITE_TO(tl, "%S%S%S", left_part, substituted, right_part); Regexp::dispose_of(&mr); DISCARD_TEXT(left_part); DISCARD_TEXT(varname); DISCARD_TEXT(substituted); DISCARD_TEXT(right_part); } @ This is why, for instance, |[[Author]]| is replaced by the author's name: @ = Str::copy(substituted, Bibliographic::get_datum(W, varname)); @ We store little about the complete-web-in-one-file PDF: @ = if (swarm_leader) if (Formats::substitute_post_processing_data(substituted, swarm_leader, detail, pattern) == FALSE) WRITE_TO(substituted, "%S for complete web", detail); @ And here for Chapters: @ = if (Str::eq_wide_string(detail, L"Title")) { Str::copy(substituted, C->ch_title); } else if (Str::eq_wide_string(detail, L"Code")) { Str::copy(substituted, C->ch_range); } else if (Str::eq_wide_string(detail, L"Purpose")) { Str::copy(substituted, C->rubric); } else if (Formats::substitute_post_processing_data(substituted, C->ch_weave, detail, pattern)) { ; } else { WRITE_TO(substituted, "%S for %S", varname, C->ch_title); } @ And this, finally, is a very similar construction for Sections. @ = if (Str::eq_wide_string(detail, L"Title")) { Str::copy(substituted, S->sect_title); } else if (Str::eq_wide_string(detail, L"Purpose")) { Str::copy(substituted, S->sect_purpose); } else if (Str::eq_wide_string(detail, L"Code")) { Str::copy(substituted, S->range); } else if (Str::eq_wide_string(detail, L"Lines")) { WRITE_TO(substituted, "%d", S->sect_extent); } else if (Str::eq_wide_string(detail, L"Source")) { WRITE_TO(substituted, "%f", S->source_file_for_section); } else if (Str::eq_wide_string(detail, L"Page")) { TEMPORARY_TEXT(linkto); Str::copy(linkto, S->range); LOOP_THROUGH_TEXT(P, linkto) if ((Str::get(P) == '/') || (Str::get(P) == ' ')) Str::put(P, '-'); WRITE_TO(linkto, ".html"); Str::copy(substituted, linkto); DISCARD_TEXT(linkto); } else if (Str::eq_wide_string(detail, L"Paragraphs")) { WRITE_TO(substituted, "%d", S->sect_paragraphs); } else if (Str::eq_wide_string(detail, L"Mean")) { int denom = S->sect_paragraphs; if (denom == 0) denom = 1; WRITE_TO(substituted, "%d", S->sect_extent/denom); } else if (Formats::substitute_post_processing_data(substituted, S->sect_weave, detail, pattern)) { ; } else { WRITE_TO(substituted, "%S for %S", varname, S->sect_title); } @h Transcribing CSS. = void Indexer::transcribe_CSS(OUTPUT_STREAM, filename *CSS_file) { WRITE("\n"); } void Indexer::copy_CSS(text_stream *line, text_file_position *tfp, void *X) { text_stream *OUT = (text_stream *) X; WRITE("%S\n", line); }