Implement support for Ambiguous = Wide (#406)
Some checks are pending
CI / check (ubuntu-latest) (push) Waiting to run
CI / check (windows-latest) (push) Waiting to run

Does what it says on the tin. It's just a lot in the can.

Closes #115
This commit is contained in:
Leonard Hecker 2025-06-04 23:39:56 +02:00 committed by GitHub
parent db1e813603
commit f8bea2be19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 1125 additions and 994 deletions

View file

@ -199,6 +199,13 @@ impl DocumentManager {
Ok(self.list.front_mut().unwrap()) Ok(self.list.front_mut().unwrap())
} }
pub fn reflow_all(&self) {
for doc in &self.list {
let mut tb = doc.buffer.borrow_mut();
tb.reflow();
}
}
pub fn open_for_reading(path: &Path) -> apperr::Result<File> { pub fn open_for_reading(path: &Path) -> apperr::Result<File> {
File::open(path).map_err(apperr::Error::from) File::open(path).map_err(apperr::Error::from)
} }

View file

@ -23,12 +23,12 @@ use draw_menubar::*;
use draw_statusbar::*; use draw_statusbar::*;
use edit::arena::{self, Arena, ArenaString, scratch_arena}; use edit::arena::{self, Arena, ArenaString, scratch_arena};
use edit::framebuffer::{self, IndexedColor}; use edit::framebuffer::{self, IndexedColor};
use edit::helpers::{KIBI, MEBI, MetricFormatter, Rect, Size}; use edit::helpers::{CoordType, KIBI, MEBI, MetricFormatter, Rect, Size};
use edit::input::{self, kbmod, vk}; use edit::input::{self, kbmod, vk};
use edit::oklab::oklab_blend; use edit::oklab::oklab_blend;
use edit::tui::*; use edit::tui::*;
use edit::vt::{self, Token}; use edit::vt::{self, Token};
use edit::{apperr, arena_format, base64, path, sys}; use edit::{apperr, arena_format, base64, path, sys, unicode};
use localization::*; use localization::*;
use state::*; use state::*;
@ -79,7 +79,7 @@ fn run() -> apperr::Result<()> {
let mut input_parser = input::Parser::new(); let mut input_parser = input::Parser::new();
let mut tui = Tui::new()?; let mut tui = Tui::new()?;
let _restore = setup_terminal(&mut tui, &mut vt_parser); let _restore = setup_terminal(&mut tui, &mut state, &mut vt_parser);
state.menubar_color_bg = oklab_blend( state.menubar_color_bg = oklab_blend(
tui.indexed(IndexedColor::Background), tui.indexed(IndexedColor::Background),
@ -502,7 +502,7 @@ impl Drop for RestoreModes {
} }
} }
fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes { fn setup_terminal(tui: &mut Tui, state: &mut State, vt_parser: &mut vt::Parser) -> RestoreModes {
sys::write_stdout(concat!( sys::write_stdout(concat!(
// 1049: Alternative Screen Buffer // 1049: Alternative Screen Buffer
// I put the ASB switch in the beginning, just in case the terminal performs // I put the ASB switch in the beginning, just in case the terminal performs
@ -517,6 +517,12 @@ fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
"\x1b]4;8;?;9;?;10;?;11;?;12;?;13;?;14;?;15;?\x07", "\x1b]4;8;?;9;?;10;?;11;?;12;?;13;?;14;?;15;?\x07",
// OSC 10 and 11 queries for the current foreground and background colors. // OSC 10 and 11 queries for the current foreground and background colors.
"\x1b]10;?\x07\x1b]11;?\x07", "\x1b]10;?\x07\x1b]11;?\x07",
// Test whether ambiguous width characters are two columns wide.
// We use "…", because it's the most common ambiguous width character we use,
// and the old Windows conhost doesn't actually use wcwidth, it measures the
// actual display width of the character and assigns it columns accordingly.
// We detect it by writing the character and asking for the cursor position.
"\r\x1b[6n",
// CSI c reports the terminal capabilities. // CSI c reports the terminal capabilities.
// It also helps us to detect the end of the responses, because not all // It also helps us to detect the end of the responses, because not all
// terminals support the OSC queries, but all of them support CSI c. // terminals support the OSC queries, but all of them support CSI c.
@ -527,6 +533,7 @@ fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
let mut osc_buffer = String::new(); let mut osc_buffer = String::new();
let mut indexed_colors = framebuffer::DEFAULT_THEME; let mut indexed_colors = framebuffer::DEFAULT_THEME;
let mut color_responses = 0; let mut color_responses = 0;
let mut ambiguous_width = 1;
while !done { while !done {
let scratch = scratch_arena(None); let scratch = scratch_arena(None);
@ -537,7 +544,12 @@ fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
let mut vt_stream = vt_parser.parse(&input); let mut vt_stream = vt_parser.parse(&input);
while let Some(token) = vt_stream.next() { while let Some(token) = vt_stream.next() {
match token { match token {
Token::Csi(state) if state.final_byte == 'c' => done = true, Token::Csi(csi) => match csi.final_byte {
'c' => done = true,
// CPR (Cursor Position Report) response.
'R' => ambiguous_width = csi.params[1] as CoordType - 1,
_ => {}
},
Token::Osc { mut data, partial } => { Token::Osc { mut data, partial } => {
if partial { if partial {
osc_buffer.push_str(data); osc_buffer.push_str(data);
@ -594,6 +606,11 @@ fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
} }
} }
if ambiguous_width == 2 {
unicode::setup_ambiguous_width(2);
state.documents.reflow_all();
}
if color_responses == indexed_colors.len() { if color_responses == indexed_colors.len() {
tui.setup_indexed_colors(indexed_colors); tui.setup_indexed_colors(indexed_colors);
} }

View file

@ -427,7 +427,7 @@ impl TextBuffer {
false false
} else { } else {
self.margin_enabled = enabled; self.margin_enabled = enabled;
self.reflow(true); self.reflow();
true true
} }
} }
@ -482,7 +482,7 @@ impl TextBuffer {
false false
} else { } else {
self.width = width; self.width = width;
self.reflow(true); self.reflow();
true true
} }
} }
@ -499,7 +499,7 @@ impl TextBuffer {
false false
} else { } else {
self.tab_size = width; self.tab_size = width;
self.reflow(true); self.reflow();
true true
} }
} }
@ -524,7 +524,7 @@ impl TextBuffer {
self.ruler = column; self.ruler = column;
} }
fn reflow(&mut self, force: bool) { pub fn reflow(&mut self) {
// +1 onto logical_lines, because line numbers are 1-based. // +1 onto logical_lines, because line numbers are 1-based.
// +1 onto log10, because we want the digit width and not the actual log10. // +1 onto log10, because we want the digit width and not the actual log10.
// +3 onto log10, because we append " | " to the line numbers to form the margin. // +3 onto log10, because we append " | " to the line numbers to form the margin.
@ -536,25 +536,26 @@ impl TextBuffer {
let text_width = self.text_width(); let text_width = self.text_width();
// 2 columns are required, because otherwise wide glyphs wouldn't ever fit. // 2 columns are required, because otherwise wide glyphs wouldn't ever fit.
let word_wrap_column = self.word_wrap_column =
if self.word_wrap_enabled && text_width >= 2 { text_width } else { 0 }; if self.word_wrap_enabled && text_width >= 2 { text_width } else { 0 };
if force || self.word_wrap_column > word_wrap_column { // Recalculate the cursor position.
self.word_wrap_column = word_wrap_column; self.cursor = self.cursor_move_to_logical_internal(
if self.word_wrap_column > 0 {
if self.cursor.offset != 0 { Default::default()
self.cursor = self } else {
.cursor_move_to_logical_internal(Default::default(), self.cursor.logical_pos); self.goto_line_start(self.cursor, self.cursor.logical_pos.y)
} },
self.cursor.logical_pos,
);
// Recalculate the line statistics. // Recalculate the line statistics.
if self.word_wrap_enabled { if self.word_wrap_column > 0 {
let end = self.cursor_move_to_logical_internal(self.cursor, Point::MAX); let end = self.cursor_move_to_logical_internal(self.cursor, Point::MAX);
self.stats.visual_lines = end.visual_pos.y + 1; self.stats.visual_lines = end.visual_pos.y + 1;
} else { } else {
self.stats.visual_lines = self.stats.logical_lines; self.stats.visual_lines = self.stats.logical_lines;
} }
}
self.cursor_for_rendering = None; self.cursor_for_rendering = None;
} }
@ -583,7 +584,7 @@ impl TextBuffer {
self.set_selection(None); self.set_selection(None);
self.search = None; self.search = None;
self.mark_as_clean(); self.mark_as_clean();
self.reflow(true); self.reflow();
} }
/// Copies the contents of the buffer into a string. /// Copies the contents of the buffer into a string.
@ -2312,9 +2313,7 @@ impl TextBuffer {
} }
self.search = None; self.search = None;
self.cursor_for_rendering = None;
// Also takes care of clearing `cursor_for_rendering`.
self.reflow(false);
} }
/// Undo the last edit operation. /// Undo the last edit operation.
@ -2428,8 +2427,7 @@ impl TextBuffer {
} }
} }
// Also takes care of clearing `cursor_for_rendering`. self.cursor_for_rendering = None;
self.reflow(false);
} }
/// For interfacing with ICU. /// For interfacing with ICU.

View file

@ -9,6 +9,25 @@ use crate::document::ReadableDocument;
use crate::helpers::{CoordType, Point}; use crate::helpers::{CoordType, Point};
use crate::simd::{memchr2, memrchr2}; use crate::simd::{memchr2, memrchr2};
// On one hand it's disgusting that I wrote this as a global variable, but on the
// other hand, this isn't a public library API, and it makes the code a lot cleaner,
// because we don't need to inject this once-per-process value everywhere.
static mut AMBIGUOUS_WIDTH: usize = 1;
/// Sets the width of "ambiguous" width characters as per "UAX #11: East Asian Width".
///
/// Defaults to 1.
pub fn setup_ambiguous_width(ambiguous_width: CoordType) {
unsafe { AMBIGUOUS_WIDTH = ambiguous_width as usize };
}
#[inline]
fn ambiguous_width() -> usize {
// SAFETY: This is a global variable that is set once per process.
// It is never changed after that, so this is safe to call.
unsafe { AMBIGUOUS_WIDTH }
}
/// Stores a position inside a [`ReadableDocument`]. /// Stores a position inside a [`ReadableDocument`].
/// ///
/// The cursor tracks both the absolute byte-offset, /// The cursor tracks both the absolute byte-offset,
@ -40,16 +59,25 @@ pub struct Cursor {
/// Your entrypoint to navigating inside a [`ReadableDocument`]. /// Your entrypoint to navigating inside a [`ReadableDocument`].
#[derive(Clone)] #[derive(Clone)]
pub struct MeasurementConfig<'doc> { pub struct MeasurementConfig<'doc> {
buffer: &'doc dyn ReadableDocument, cursor: Cursor,
tab_size: CoordType, tab_size: CoordType,
word_wrap_column: CoordType, word_wrap_column: CoordType,
cursor: Cursor, buffer: &'doc dyn ReadableDocument,
} }
impl<'doc> MeasurementConfig<'doc> { impl<'doc> MeasurementConfig<'doc> {
/// Creates a new [`MeasurementConfig`] for the given document. /// Creates a new [`MeasurementConfig`] for the given document.
pub fn new(buffer: &'doc dyn ReadableDocument) -> Self { pub fn new(buffer: &'doc dyn ReadableDocument) -> Self {
Self { buffer, tab_size: 8, word_wrap_column: 0, cursor: Default::default() } Self { cursor: Default::default(), tab_size: 8, word_wrap_column: 0, buffer }
}
/// Sets the initial cursor to the given position.
///
/// WARNING: While the code doesn't panic if the cursor is invalid,
/// the results will obviously be complete garbage.
pub fn with_cursor(mut self, cursor: Cursor) -> Self {
self.cursor = cursor;
self
} }
/// Sets the tab size. /// Sets the tab size.
@ -68,31 +96,13 @@ impl<'doc> MeasurementConfig<'doc> {
self self
} }
/// Sets the initial cursor to the given position.
///
/// WARNING: While the code doesn't panic if the cursor is invalid,
/// the results will obviously be complete garbage.
pub fn with_cursor(mut self, cursor: Cursor) -> Self {
self.cursor = cursor;
self
}
/// Navigates **forward** to the given absolute offset. /// Navigates **forward** to the given absolute offset.
/// ///
/// # Returns /// # Returns
/// ///
/// The cursor position after the navigation. /// The cursor position after the navigation.
pub fn goto_offset(&mut self, offset: usize) -> Cursor { pub fn goto_offset(&mut self, offset: usize) -> Cursor {
self.cursor = Self::measure_forward( self.measure_forward(offset, Point::MAX, Point::MAX)
self.tab_size,
self.word_wrap_column,
offset,
Point::MAX,
Point::MAX,
self.cursor,
self.buffer,
);
self.cursor
} }
/// Navigates **forward** to the given logical position. /// Navigates **forward** to the given logical position.
@ -103,16 +113,7 @@ impl<'doc> MeasurementConfig<'doc> {
/// ///
/// The cursor position after the navigation. /// The cursor position after the navigation.
pub fn goto_logical(&mut self, logical_target: Point) -> Cursor { pub fn goto_logical(&mut self, logical_target: Point) -> Cursor {
self.cursor = Self::measure_forward( self.measure_forward(usize::MAX, logical_target, Point::MAX)
self.tab_size,
self.word_wrap_column,
usize::MAX,
logical_target,
Point::MAX,
self.cursor,
self.buffer,
);
self.cursor
} }
/// Navigates **forward** to the given visual position. /// Navigates **forward** to the given visual position.
@ -123,16 +124,7 @@ impl<'doc> MeasurementConfig<'doc> {
/// ///
/// The cursor position after the navigation. /// The cursor position after the navigation.
pub fn goto_visual(&mut self, visual_target: Point) -> Cursor { pub fn goto_visual(&mut self, visual_target: Point) -> Cursor {
self.cursor = Self::measure_forward( self.measure_forward(usize::MAX, Point::MAX, visual_target)
self.tab_size,
self.word_wrap_column,
usize::MAX,
Point::MAX,
visual_target,
self.cursor,
self.buffer,
);
self.cursor
} }
/// Returns the current cursor position. /// Returns the current cursor position.
@ -149,27 +141,24 @@ impl<'doc> MeasurementConfig<'doc> {
// the wrap exists on both lines and it'll default to wrapping. `goto_visual` however will always // the wrap exists on both lines and it'll default to wrapping. `goto_visual` however will always
// try to return a Y position that matches the requested position, so that Home/End works properly. // try to return a Y position that matches the requested position, so that Home/End works properly.
fn measure_forward( fn measure_forward(
tab_size: CoordType, &mut self,
word_wrap_column: CoordType,
offset_target: usize, offset_target: usize,
logical_target: Point, logical_target: Point,
visual_target: Point, visual_target: Point,
cursor: Cursor,
buffer: &dyn ReadableDocument,
) -> Cursor { ) -> Cursor {
if cursor.offset >= offset_target if self.cursor.offset >= offset_target
|| cursor.logical_pos >= logical_target || self.cursor.logical_pos >= logical_target
|| cursor.visual_pos >= visual_target || self.cursor.visual_pos >= visual_target
{ {
return cursor; return self.cursor;
} }
let mut offset = cursor.offset; let mut offset = self.cursor.offset;
let mut logical_pos_x = cursor.logical_pos.x; let mut logical_pos_x = self.cursor.logical_pos.x;
let mut logical_pos_y = cursor.logical_pos.y; let mut logical_pos_y = self.cursor.logical_pos.y;
let mut visual_pos_x = cursor.visual_pos.x; let mut visual_pos_x = self.cursor.visual_pos.x;
let mut visual_pos_y = cursor.visual_pos.y; let mut visual_pos_y = self.cursor.visual_pos.y;
let mut column = cursor.column; let mut column = self.cursor.column;
let mut logical_target_x = Self::calc_target_x(logical_target, logical_pos_y); let mut logical_target_x = Self::calc_target_x(logical_target, logical_pos_y);
let mut visual_target_x = Self::calc_target_x(visual_target, visual_pos_y); let mut visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
@ -177,7 +166,7 @@ impl<'doc> MeasurementConfig<'doc> {
// wrap_opp = Wrap Opportunity // wrap_opp = Wrap Opportunity
// These store the position and column of the last wrap opportunity. If `word_wrap_column` is // These store the position and column of the last wrap opportunity. If `word_wrap_column` is
// zero (word wrap disabled), all grapheme clusters are a wrap opportunity, because none are. // zero (word wrap disabled), all grapheme clusters are a wrap opportunity, because none are.
let mut wrap_opp = cursor.wrap_opp; let mut wrap_opp = self.cursor.wrap_opp;
let mut wrap_opp_offset = offset; let mut wrap_opp_offset = offset;
let mut wrap_opp_logical_pos_x = logical_pos_x; let mut wrap_opp_logical_pos_x = logical_pos_x;
let mut wrap_opp_visual_pos_x = visual_pos_x; let mut wrap_opp_visual_pos_x = visual_pos_x;
@ -209,7 +198,7 @@ impl<'doc> MeasurementConfig<'doc> {
loop { loop {
if !chunk_iter.has_next() { if !chunk_iter.has_next() {
cold_path(); cold_path();
chunk_iter = Utf8Chars::new(buffer.read_forward(chunk_range.end), 0); chunk_iter = Utf8Chars::new(self.buffer.read_forward(chunk_range.end), 0);
chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len(); chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
} }
@ -219,7 +208,8 @@ impl<'doc> MeasurementConfig<'doc> {
// Similar applies to the width. // Similar applies to the width.
props_last_char = props_next_cluster; props_last_char = props_next_cluster;
offset_next_cluster = chunk_range.start + chunk_iter.offset(); offset_next_cluster = chunk_range.start + chunk_iter.offset();
width += ucd_grapheme_cluster_character_width(props_next_cluster) as CoordType; width += ucd_grapheme_cluster_character_width(props_next_cluster, ambiguous_width())
as CoordType;
// The `Document::read_forward` interface promises us that it will not split // The `Document::read_forward` interface promises us that it will not split
// grapheme clusters across chunks. Therefore, we can safely break here. // grapheme clusters across chunks. Therefore, we can safely break here.
@ -252,10 +242,10 @@ impl<'doc> MeasurementConfig<'doc> {
// Tabs require special handling because they can have a variable width. // Tabs require special handling because they can have a variable width.
if props_last_char == ucd_tab_properties() { if props_last_char == ucd_tab_properties() {
// SAFETY: `tab_size` is clamped to >= 1 in `with_tab_size`. // SAFETY: `self.tab_size` is clamped to >= 1 in `with_tab_size`.
// This assert ensures that Rust doesn't insert panicking null checks. // This assert ensures that Rust doesn't insert panicking null checks.
unsafe { std::hint::assert_unchecked(tab_size >= 1) }; unsafe { std::hint::assert_unchecked(self.tab_size >= 1) };
width = tab_size - (column % tab_size); width = self.tab_size - (column % self.tab_size);
} }
// Hard wrap: Both the logical and visual position advance by one line. // Hard wrap: Both the logical and visual position advance by one line.
@ -290,7 +280,7 @@ impl<'doc> MeasurementConfig<'doc> {
// Since this code above may need to revert to a previous `wrap_opp_*`, // Since this code above may need to revert to a previous `wrap_opp_*`,
// it must be done before advancing / checking for `ucd_line_break_joins`. // it must be done before advancing / checking for `ucd_line_break_joins`.
if word_wrap_column > 0 && visual_pos_x + width > word_wrap_column { if self.word_wrap_column > 0 && visual_pos_x + width > self.word_wrap_column {
if !wrap_opp { if !wrap_opp {
// Otherwise, the lack of a wrap opportunity means that a single word // Otherwise, the lack of a wrap opportunity means that a single word
// is wider than the word wrap column. We need to force-break the word. // is wider than the word wrap column. We need to force-break the word.
@ -342,7 +332,7 @@ impl<'doc> MeasurementConfig<'doc> {
visual_pos_x += width; visual_pos_x += width;
column += width; column += width;
if word_wrap_column > 0 if self.word_wrap_column > 0
&& !ucd_line_break_joins(props_current_cluster, props_next_cluster) && !ucd_line_break_joins(props_current_cluster, props_next_cluster)
{ {
wrap_opp = true; wrap_opp = true;
@ -355,7 +345,7 @@ impl<'doc> MeasurementConfig<'doc> {
// If we're here, we hit our target. Now the only question is: // If we're here, we hit our target. Now the only question is:
// Is the word we're currently on so wide that it will be wrapped further down the document? // Is the word we're currently on so wide that it will be wrapped further down the document?
if word_wrap_column > 0 { if self.word_wrap_column > 0 {
if !wrap_opp { if !wrap_opp {
// If the current laid-out line had no wrap opportunities, it means we had an input // If the current laid-out line had no wrap opportunities, it means we had an input
// such as "fooooooooooooooooooooo" at a `word_wrap_column` of e.g. 10. The word // such as "fooooooooooooooooooooo" at a `word_wrap_column` of e.g. 10. The word
@ -386,7 +376,8 @@ impl<'doc> MeasurementConfig<'doc> {
loop { loop {
if !chunk_iter.has_next() { if !chunk_iter.has_next() {
cold_path(); cold_path();
chunk_iter = Utf8Chars::new(buffer.read_forward(chunk_range.end), 0); chunk_iter =
Utf8Chars::new(self.buffer.read_forward(chunk_range.end), 0);
chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len(); chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
} }
@ -396,8 +387,10 @@ impl<'doc> MeasurementConfig<'doc> {
// Similar applies to the width. // Similar applies to the width.
props_last_char = props_next_cluster; props_last_char = props_next_cluster;
offset_next_cluster = chunk_range.start + chunk_iter.offset(); offset_next_cluster = chunk_range.start + chunk_iter.offset();
width += width += ucd_grapheme_cluster_character_width(
ucd_grapheme_cluster_character_width(props_next_cluster) as CoordType; props_next_cluster,
ambiguous_width(),
) as CoordType;
// The `Document::read_forward` interface promises us that it will not split // The `Document::read_forward` interface promises us that it will not split
// grapheme clusters across chunks. Therefore, we can safely break here. // grapheme clusters across chunks. Therefore, we can safely break here.
@ -431,10 +424,10 @@ impl<'doc> MeasurementConfig<'doc> {
// Tabs require special handling because they can have a variable width. // Tabs require special handling because they can have a variable width.
if props_last_char == ucd_tab_properties() { if props_last_char == ucd_tab_properties() {
// SAFETY: `tab_size` is clamped to >= 1 in `with_tab_size`. // SAFETY: `self.tab_size` is clamped to >= 1 in `with_tab_size`.
// This assert ensures that Rust doesn't insert panicking null checks. // This assert ensures that Rust doesn't insert panicking null checks.
unsafe { std::hint::assert_unchecked(tab_size >= 1) }; unsafe { std::hint::assert_unchecked(self.tab_size >= 1) };
width = tab_size - (column % tab_size); width = self.tab_size - (column % self.tab_size);
} }
// Hard wrap: Both the logical and visual position advance by one line. // Hard wrap: Both the logical and visual position advance by one line.
@ -444,7 +437,7 @@ impl<'doc> MeasurementConfig<'doc> {
visual_pos_x_lookahead += width; visual_pos_x_lookahead += width;
if visual_pos_x_lookahead > word_wrap_column { if visual_pos_x_lookahead > self.word_wrap_column {
visual_pos_x -= wrap_opp_visual_pos_x; visual_pos_x -= wrap_opp_visual_pos_x;
visual_pos_y += 1; visual_pos_y += 1;
break; break;
@ -467,13 +460,12 @@ impl<'doc> MeasurementConfig<'doc> {
} }
} }
Cursor { self.cursor.offset = offset;
offset, self.cursor.logical_pos = Point { x: logical_pos_x, y: logical_pos_y };
logical_pos: Point { x: logical_pos_x, y: logical_pos_y }, self.cursor.visual_pos = Point { x: visual_pos_x, y: visual_pos_y };
visual_pos: Point { x: visual_pos_x, y: visual_pos_y }, self.cursor.column = column;
column, self.cursor.wrap_opp = wrap_opp;
wrap_opp, self.cursor
}
} }
#[inline] #[inline]

File diff suppressed because it is too large Load diff

View file

@ -247,12 +247,15 @@ Usage: grapheme-table-gen [options...] <ucd.nounihan.grouped.xml>
Expose tab and linefeed as grapheme cluster properties Expose tab and linefeed as grapheme cluster properties
--no-ambiguous Treat all ambiguous characters as narrow --no-ambiguous Treat all ambiguous characters as narrow
--line-breaks Store and expose line break information --line-breaks Store and expose line break information
Download ucd.nounihan.grouped.xml at:
https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip
"; ";
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
let mut args = pico_args::Arguments::from_env(); let mut args = pico_args::Arguments::from_env();
if args.contains(["-h", "--help"]) { if args.contains(["-h", "--help"]) {
eprint!("{}", HELP); eprint!("{HELP}");
return Ok(()); return Ok(());
} }
@ -310,7 +313,7 @@ fn main() -> anyhow::Result<()> {
for s in &out.trie.stages { for s in &out.trie.stages {
actual = s.values[actual as usize + ((cp >> s.shift) & s.mask)]; actual = s.values[actual as usize + ((cp >> s.shift) & s.mask)];
} }
assert_eq!(expected.value(), actual, "trie sanity check failed for U+{:04X}", cp); assert_eq!(expected.value(), actual, "trie sanity check failed for U+{cp:04X}");
} }
for (cp, &expected) in out.ucd.values[..0x80].iter().enumerate() { for (cp, &expected) in out.ucd.values[..0x80].iter().enumerate() {
let last = out.trie.stages.last().unwrap(); let last = out.trie.stages.last().unwrap();
@ -318,8 +321,7 @@ fn main() -> anyhow::Result<()> {
assert_eq!( assert_eq!(
expected.value(), expected.value(),
actual, actual,
"trie sanity check failed for direct ASCII mapping of U+{:04X}", "trie sanity check failed for direct ASCII mapping of U+{cp:04X}"
cp
); );
} }
@ -372,7 +374,7 @@ fn generate_c(out: Output) -> String {
for table in &out.rules_gc { for table in &out.rules_gc {
buf.push_str(" {\n"); buf.push_str(" {\n");
for &r in table { for &r in table {
_ = writeln!(buf, " 0b{:032b},", r); _ = writeln!(buf, " 0b{r:032b},");
} }
buf.push_str(" },\n"); buf.push_str(" },\n");
} }
@ -443,14 +445,37 @@ fn generate_c(out: Output) -> String {
{{ {{
return state == 3; return state == 3;
}} }}
inline int ucd_grapheme_cluster_character_width(const int val)
{{
return val >> {1};
}}
", ",
out.ucd.packing.mask_cluster_break, out.ucd.packing.mask_cluster_break,
);
if out.arg_no_ambiguous {
_ = writedoc!(
buf,
"
inline int ucd_grapheme_cluster_character_width(const int val)
{{
return val >> {};
}}
",
out.ucd.packing.shift_character_width, out.ucd.packing.shift_character_width,
); );
} else {
_ = writedoc!(
buf,
"
inline int ucd_grapheme_cluster_character_width(const int val, int ambiguous_width)
{{
int w = val >> {};
if (w == 3) {{
w = ambiguous_width;
}}
return w;
}}
",
out.ucd.packing.shift_character_width,
);
}
if out.arg_line_breaks { if out.arg_line_breaks {
_ = writedoc!( _ = writedoc!(
@ -546,7 +571,7 @@ fn generate_rust(out: Output) -> String {
for table in &out.rules_gc { for table in &out.rules_gc {
buf.push_str(" [\n"); buf.push_str(" [\n");
for &r in table { for &r in table {
_ = writeln!(buf, " 0b{:032b},", r); _ = writeln!(buf, " 0b{r:032b},");
} }
buf.push_str(" ],\n"); buf.push_str(" ],\n");
} }
@ -622,14 +647,42 @@ fn generate_rust(out: Output) -> String {
pub fn ucd_grapheme_cluster_joins_done(state: u32) -> bool {{ pub fn ucd_grapheme_cluster_joins_done(state: u32) -> bool {{
state == 3 state == 3
}} }}
#[inline(always)]
pub fn ucd_grapheme_cluster_character_width(val: usize) -> usize {{
val >> {1}
}}
", ",
out.ucd.packing.mask_cluster_break, out.ucd.packing.mask_cluster_break,
);
if out.arg_no_ambiguous {
_ = writedoc!(
buf,
"
#[inline(always)]
pub fn ucd_grapheme_cluster_character_width(val: usize) -> usize {{
val >> {}
}}
",
out.ucd.packing.shift_character_width, out.ucd.packing.shift_character_width,
); );
} else {
// `cold_path()` ensures that LLVM emits a branch instead of a conditional move.
// This improves performance, as ambiguous characters are rare.
// `> 2` is used instead of `== 3`, because this way the compiler can immediately
// test whether `val > (2 << shift_character_width)` before shifting.
_ = writedoc!(
buf,
"
#[inline(always)]
pub fn ucd_grapheme_cluster_character_width(val: usize, ambiguous_width: usize) -> usize {{
let mut w = val >> {};
if w > 2 {{
cold_path();
w = ambiguous_width;
}}
w
}}
",
out.ucd.packing.shift_character_width,
);
}
if out.arg_line_breaks { if out.arg_line_breaks {
_ = writedoc!( _ = writedoc!(
@ -681,6 +734,17 @@ fn generate_rust(out: Output) -> String {
); );
} }
if !out.arg_no_ambiguous {
_ = writedoc!(
buf,
"
#[cold]
#[inline(always)]
fn cold_path() {{}}
"
);
}
buf.push_str("// END: Generated by grapheme-table-gen\n"); buf.push_str("// END: Generated by grapheme-table-gen\n");
buf buf
} }