mirror of
https://github.com/microsoft/edit.git
synced 2025-07-03 14:33:22 +00:00
Implement support for Ambiguous = Wide (#406)
Does what it says on the tin. It's just a lot in the can. Closes #115
This commit is contained in:
parent
db1e813603
commit
f8bea2be19
6 changed files with 1125 additions and 994 deletions
|
@ -199,6 +199,13 @@ impl DocumentManager {
|
||||||
Ok(self.list.front_mut().unwrap())
|
Ok(self.list.front_mut().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn reflow_all(&self) {
|
||||||
|
for doc in &self.list {
|
||||||
|
let mut tb = doc.buffer.borrow_mut();
|
||||||
|
tb.reflow();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn open_for_reading(path: &Path) -> apperr::Result<File> {
|
pub fn open_for_reading(path: &Path) -> apperr::Result<File> {
|
||||||
File::open(path).map_err(apperr::Error::from)
|
File::open(path).map_err(apperr::Error::from)
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,12 +23,12 @@ use draw_menubar::*;
|
||||||
use draw_statusbar::*;
|
use draw_statusbar::*;
|
||||||
use edit::arena::{self, Arena, ArenaString, scratch_arena};
|
use edit::arena::{self, Arena, ArenaString, scratch_arena};
|
||||||
use edit::framebuffer::{self, IndexedColor};
|
use edit::framebuffer::{self, IndexedColor};
|
||||||
use edit::helpers::{KIBI, MEBI, MetricFormatter, Rect, Size};
|
use edit::helpers::{CoordType, KIBI, MEBI, MetricFormatter, Rect, Size};
|
||||||
use edit::input::{self, kbmod, vk};
|
use edit::input::{self, kbmod, vk};
|
||||||
use edit::oklab::oklab_blend;
|
use edit::oklab::oklab_blend;
|
||||||
use edit::tui::*;
|
use edit::tui::*;
|
||||||
use edit::vt::{self, Token};
|
use edit::vt::{self, Token};
|
||||||
use edit::{apperr, arena_format, base64, path, sys};
|
use edit::{apperr, arena_format, base64, path, sys, unicode};
|
||||||
use localization::*;
|
use localization::*;
|
||||||
use state::*;
|
use state::*;
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ fn run() -> apperr::Result<()> {
|
||||||
let mut input_parser = input::Parser::new();
|
let mut input_parser = input::Parser::new();
|
||||||
let mut tui = Tui::new()?;
|
let mut tui = Tui::new()?;
|
||||||
|
|
||||||
let _restore = setup_terminal(&mut tui, &mut vt_parser);
|
let _restore = setup_terminal(&mut tui, &mut state, &mut vt_parser);
|
||||||
|
|
||||||
state.menubar_color_bg = oklab_blend(
|
state.menubar_color_bg = oklab_blend(
|
||||||
tui.indexed(IndexedColor::Background),
|
tui.indexed(IndexedColor::Background),
|
||||||
|
@ -502,7 +502,7 @@ impl Drop for RestoreModes {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
|
fn setup_terminal(tui: &mut Tui, state: &mut State, vt_parser: &mut vt::Parser) -> RestoreModes {
|
||||||
sys::write_stdout(concat!(
|
sys::write_stdout(concat!(
|
||||||
// 1049: Alternative Screen Buffer
|
// 1049: Alternative Screen Buffer
|
||||||
// I put the ASB switch in the beginning, just in case the terminal performs
|
// I put the ASB switch in the beginning, just in case the terminal performs
|
||||||
|
@ -517,6 +517,12 @@ fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
|
||||||
"\x1b]4;8;?;9;?;10;?;11;?;12;?;13;?;14;?;15;?\x07",
|
"\x1b]4;8;?;9;?;10;?;11;?;12;?;13;?;14;?;15;?\x07",
|
||||||
// OSC 10 and 11 queries for the current foreground and background colors.
|
// OSC 10 and 11 queries for the current foreground and background colors.
|
||||||
"\x1b]10;?\x07\x1b]11;?\x07",
|
"\x1b]10;?\x07\x1b]11;?\x07",
|
||||||
|
// Test whether ambiguous width characters are two columns wide.
|
||||||
|
// We use "…", because it's the most common ambiguous width character we use,
|
||||||
|
// and the old Windows conhost doesn't actually use wcwidth, it measures the
|
||||||
|
// actual display width of the character and assigns it columns accordingly.
|
||||||
|
// We detect it by writing the character and asking for the cursor position.
|
||||||
|
"\r…\x1b[6n",
|
||||||
// CSI c reports the terminal capabilities.
|
// CSI c reports the terminal capabilities.
|
||||||
// It also helps us to detect the end of the responses, because not all
|
// It also helps us to detect the end of the responses, because not all
|
||||||
// terminals support the OSC queries, but all of them support CSI c.
|
// terminals support the OSC queries, but all of them support CSI c.
|
||||||
|
@ -527,6 +533,7 @@ fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
|
||||||
let mut osc_buffer = String::new();
|
let mut osc_buffer = String::new();
|
||||||
let mut indexed_colors = framebuffer::DEFAULT_THEME;
|
let mut indexed_colors = framebuffer::DEFAULT_THEME;
|
||||||
let mut color_responses = 0;
|
let mut color_responses = 0;
|
||||||
|
let mut ambiguous_width = 1;
|
||||||
|
|
||||||
while !done {
|
while !done {
|
||||||
let scratch = scratch_arena(None);
|
let scratch = scratch_arena(None);
|
||||||
|
@ -537,7 +544,12 @@ fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
|
||||||
let mut vt_stream = vt_parser.parse(&input);
|
let mut vt_stream = vt_parser.parse(&input);
|
||||||
while let Some(token) = vt_stream.next() {
|
while let Some(token) = vt_stream.next() {
|
||||||
match token {
|
match token {
|
||||||
Token::Csi(state) if state.final_byte == 'c' => done = true,
|
Token::Csi(csi) => match csi.final_byte {
|
||||||
|
'c' => done = true,
|
||||||
|
// CPR (Cursor Position Report) response.
|
||||||
|
'R' => ambiguous_width = csi.params[1] as CoordType - 1,
|
||||||
|
_ => {}
|
||||||
|
},
|
||||||
Token::Osc { mut data, partial } => {
|
Token::Osc { mut data, partial } => {
|
||||||
if partial {
|
if partial {
|
||||||
osc_buffer.push_str(data);
|
osc_buffer.push_str(data);
|
||||||
|
@ -594,6 +606,11 @@ fn setup_terminal(tui: &mut Tui, vt_parser: &mut vt::Parser) -> RestoreModes {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ambiguous_width == 2 {
|
||||||
|
unicode::setup_ambiguous_width(2);
|
||||||
|
state.documents.reflow_all();
|
||||||
|
}
|
||||||
|
|
||||||
if color_responses == indexed_colors.len() {
|
if color_responses == indexed_colors.len() {
|
||||||
tui.setup_indexed_colors(indexed_colors);
|
tui.setup_indexed_colors(indexed_colors);
|
||||||
}
|
}
|
||||||
|
|
|
@ -427,7 +427,7 @@ impl TextBuffer {
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
self.margin_enabled = enabled;
|
self.margin_enabled = enabled;
|
||||||
self.reflow(true);
|
self.reflow();
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -482,7 +482,7 @@ impl TextBuffer {
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
self.width = width;
|
self.width = width;
|
||||||
self.reflow(true);
|
self.reflow();
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -499,7 +499,7 @@ impl TextBuffer {
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
self.tab_size = width;
|
self.tab_size = width;
|
||||||
self.reflow(true);
|
self.reflow();
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -524,7 +524,7 @@ impl TextBuffer {
|
||||||
self.ruler = column;
|
self.ruler = column;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reflow(&mut self, force: bool) {
|
pub fn reflow(&mut self) {
|
||||||
// +1 onto logical_lines, because line numbers are 1-based.
|
// +1 onto logical_lines, because line numbers are 1-based.
|
||||||
// +1 onto log10, because we want the digit width and not the actual log10.
|
// +1 onto log10, because we want the digit width and not the actual log10.
|
||||||
// +3 onto log10, because we append " | " to the line numbers to form the margin.
|
// +3 onto log10, because we append " | " to the line numbers to form the margin.
|
||||||
|
@ -536,25 +536,26 @@ impl TextBuffer {
|
||||||
|
|
||||||
let text_width = self.text_width();
|
let text_width = self.text_width();
|
||||||
// 2 columns are required, because otherwise wide glyphs wouldn't ever fit.
|
// 2 columns are required, because otherwise wide glyphs wouldn't ever fit.
|
||||||
let word_wrap_column =
|
self.word_wrap_column =
|
||||||
if self.word_wrap_enabled && text_width >= 2 { text_width } else { 0 };
|
if self.word_wrap_enabled && text_width >= 2 { text_width } else { 0 };
|
||||||
|
|
||||||
if force || self.word_wrap_column > word_wrap_column {
|
// Recalculate the cursor position.
|
||||||
self.word_wrap_column = word_wrap_column;
|
self.cursor = self.cursor_move_to_logical_internal(
|
||||||
|
if self.word_wrap_column > 0 {
|
||||||
if self.cursor.offset != 0 {
|
Default::default()
|
||||||
self.cursor = self
|
} else {
|
||||||
.cursor_move_to_logical_internal(Default::default(), self.cursor.logical_pos);
|
self.goto_line_start(self.cursor, self.cursor.logical_pos.y)
|
||||||
}
|
},
|
||||||
|
self.cursor.logical_pos,
|
||||||
|
);
|
||||||
|
|
||||||
// Recalculate the line statistics.
|
// Recalculate the line statistics.
|
||||||
if self.word_wrap_enabled {
|
if self.word_wrap_column > 0 {
|
||||||
let end = self.cursor_move_to_logical_internal(self.cursor, Point::MAX);
|
let end = self.cursor_move_to_logical_internal(self.cursor, Point::MAX);
|
||||||
self.stats.visual_lines = end.visual_pos.y + 1;
|
self.stats.visual_lines = end.visual_pos.y + 1;
|
||||||
} else {
|
} else {
|
||||||
self.stats.visual_lines = self.stats.logical_lines;
|
self.stats.visual_lines = self.stats.logical_lines;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
self.cursor_for_rendering = None;
|
self.cursor_for_rendering = None;
|
||||||
}
|
}
|
||||||
|
@ -583,7 +584,7 @@ impl TextBuffer {
|
||||||
self.set_selection(None);
|
self.set_selection(None);
|
||||||
self.search = None;
|
self.search = None;
|
||||||
self.mark_as_clean();
|
self.mark_as_clean();
|
||||||
self.reflow(true);
|
self.reflow();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Copies the contents of the buffer into a string.
|
/// Copies the contents of the buffer into a string.
|
||||||
|
@ -2312,9 +2313,7 @@ impl TextBuffer {
|
||||||
}
|
}
|
||||||
|
|
||||||
self.search = None;
|
self.search = None;
|
||||||
|
self.cursor_for_rendering = None;
|
||||||
// Also takes care of clearing `cursor_for_rendering`.
|
|
||||||
self.reflow(false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Undo the last edit operation.
|
/// Undo the last edit operation.
|
||||||
|
@ -2428,8 +2427,7 @@ impl TextBuffer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also takes care of clearing `cursor_for_rendering`.
|
self.cursor_for_rendering = None;
|
||||||
self.reflow(false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// For interfacing with ICU.
|
/// For interfacing with ICU.
|
||||||
|
|
|
@ -9,6 +9,25 @@ use crate::document::ReadableDocument;
|
||||||
use crate::helpers::{CoordType, Point};
|
use crate::helpers::{CoordType, Point};
|
||||||
use crate::simd::{memchr2, memrchr2};
|
use crate::simd::{memchr2, memrchr2};
|
||||||
|
|
||||||
|
// On one hand it's disgusting that I wrote this as a global variable, but on the
|
||||||
|
// other hand, this isn't a public library API, and it makes the code a lot cleaner,
|
||||||
|
// because we don't need to inject this once-per-process value everywhere.
|
||||||
|
static mut AMBIGUOUS_WIDTH: usize = 1;
|
||||||
|
|
||||||
|
/// Sets the width of "ambiguous" width characters as per "UAX #11: East Asian Width".
|
||||||
|
///
|
||||||
|
/// Defaults to 1.
|
||||||
|
pub fn setup_ambiguous_width(ambiguous_width: CoordType) {
|
||||||
|
unsafe { AMBIGUOUS_WIDTH = ambiguous_width as usize };
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn ambiguous_width() -> usize {
|
||||||
|
// SAFETY: This is a global variable that is set once per process.
|
||||||
|
// It is never changed after that, so this is safe to call.
|
||||||
|
unsafe { AMBIGUOUS_WIDTH }
|
||||||
|
}
|
||||||
|
|
||||||
/// Stores a position inside a [`ReadableDocument`].
|
/// Stores a position inside a [`ReadableDocument`].
|
||||||
///
|
///
|
||||||
/// The cursor tracks both the absolute byte-offset,
|
/// The cursor tracks both the absolute byte-offset,
|
||||||
|
@ -40,16 +59,25 @@ pub struct Cursor {
|
||||||
/// Your entrypoint to navigating inside a [`ReadableDocument`].
|
/// Your entrypoint to navigating inside a [`ReadableDocument`].
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct MeasurementConfig<'doc> {
|
pub struct MeasurementConfig<'doc> {
|
||||||
buffer: &'doc dyn ReadableDocument,
|
cursor: Cursor,
|
||||||
tab_size: CoordType,
|
tab_size: CoordType,
|
||||||
word_wrap_column: CoordType,
|
word_wrap_column: CoordType,
|
||||||
cursor: Cursor,
|
buffer: &'doc dyn ReadableDocument,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'doc> MeasurementConfig<'doc> {
|
impl<'doc> MeasurementConfig<'doc> {
|
||||||
/// Creates a new [`MeasurementConfig`] for the given document.
|
/// Creates a new [`MeasurementConfig`] for the given document.
|
||||||
pub fn new(buffer: &'doc dyn ReadableDocument) -> Self {
|
pub fn new(buffer: &'doc dyn ReadableDocument) -> Self {
|
||||||
Self { buffer, tab_size: 8, word_wrap_column: 0, cursor: Default::default() }
|
Self { cursor: Default::default(), tab_size: 8, word_wrap_column: 0, buffer }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets the initial cursor to the given position.
|
||||||
|
///
|
||||||
|
/// WARNING: While the code doesn't panic if the cursor is invalid,
|
||||||
|
/// the results will obviously be complete garbage.
|
||||||
|
pub fn with_cursor(mut self, cursor: Cursor) -> Self {
|
||||||
|
self.cursor = cursor;
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets the tab size.
|
/// Sets the tab size.
|
||||||
|
@ -68,31 +96,13 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets the initial cursor to the given position.
|
|
||||||
///
|
|
||||||
/// WARNING: While the code doesn't panic if the cursor is invalid,
|
|
||||||
/// the results will obviously be complete garbage.
|
|
||||||
pub fn with_cursor(mut self, cursor: Cursor) -> Self {
|
|
||||||
self.cursor = cursor;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Navigates **forward** to the given absolute offset.
|
/// Navigates **forward** to the given absolute offset.
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
///
|
||||||
/// The cursor position after the navigation.
|
/// The cursor position after the navigation.
|
||||||
pub fn goto_offset(&mut self, offset: usize) -> Cursor {
|
pub fn goto_offset(&mut self, offset: usize) -> Cursor {
|
||||||
self.cursor = Self::measure_forward(
|
self.measure_forward(offset, Point::MAX, Point::MAX)
|
||||||
self.tab_size,
|
|
||||||
self.word_wrap_column,
|
|
||||||
offset,
|
|
||||||
Point::MAX,
|
|
||||||
Point::MAX,
|
|
||||||
self.cursor,
|
|
||||||
self.buffer,
|
|
||||||
);
|
|
||||||
self.cursor
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Navigates **forward** to the given logical position.
|
/// Navigates **forward** to the given logical position.
|
||||||
|
@ -103,16 +113,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
///
|
///
|
||||||
/// The cursor position after the navigation.
|
/// The cursor position after the navigation.
|
||||||
pub fn goto_logical(&mut self, logical_target: Point) -> Cursor {
|
pub fn goto_logical(&mut self, logical_target: Point) -> Cursor {
|
||||||
self.cursor = Self::measure_forward(
|
self.measure_forward(usize::MAX, logical_target, Point::MAX)
|
||||||
self.tab_size,
|
|
||||||
self.word_wrap_column,
|
|
||||||
usize::MAX,
|
|
||||||
logical_target,
|
|
||||||
Point::MAX,
|
|
||||||
self.cursor,
|
|
||||||
self.buffer,
|
|
||||||
);
|
|
||||||
self.cursor
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Navigates **forward** to the given visual position.
|
/// Navigates **forward** to the given visual position.
|
||||||
|
@ -123,16 +124,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
///
|
///
|
||||||
/// The cursor position after the navigation.
|
/// The cursor position after the navigation.
|
||||||
pub fn goto_visual(&mut self, visual_target: Point) -> Cursor {
|
pub fn goto_visual(&mut self, visual_target: Point) -> Cursor {
|
||||||
self.cursor = Self::measure_forward(
|
self.measure_forward(usize::MAX, Point::MAX, visual_target)
|
||||||
self.tab_size,
|
|
||||||
self.word_wrap_column,
|
|
||||||
usize::MAX,
|
|
||||||
Point::MAX,
|
|
||||||
visual_target,
|
|
||||||
self.cursor,
|
|
||||||
self.buffer,
|
|
||||||
);
|
|
||||||
self.cursor
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the current cursor position.
|
/// Returns the current cursor position.
|
||||||
|
@ -149,27 +141,24 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
// the wrap exists on both lines and it'll default to wrapping. `goto_visual` however will always
|
// the wrap exists on both lines and it'll default to wrapping. `goto_visual` however will always
|
||||||
// try to return a Y position that matches the requested position, so that Home/End works properly.
|
// try to return a Y position that matches the requested position, so that Home/End works properly.
|
||||||
fn measure_forward(
|
fn measure_forward(
|
||||||
tab_size: CoordType,
|
&mut self,
|
||||||
word_wrap_column: CoordType,
|
|
||||||
offset_target: usize,
|
offset_target: usize,
|
||||||
logical_target: Point,
|
logical_target: Point,
|
||||||
visual_target: Point,
|
visual_target: Point,
|
||||||
cursor: Cursor,
|
|
||||||
buffer: &dyn ReadableDocument,
|
|
||||||
) -> Cursor {
|
) -> Cursor {
|
||||||
if cursor.offset >= offset_target
|
if self.cursor.offset >= offset_target
|
||||||
|| cursor.logical_pos >= logical_target
|
|| self.cursor.logical_pos >= logical_target
|
||||||
|| cursor.visual_pos >= visual_target
|
|| self.cursor.visual_pos >= visual_target
|
||||||
{
|
{
|
||||||
return cursor;
|
return self.cursor;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut offset = cursor.offset;
|
let mut offset = self.cursor.offset;
|
||||||
let mut logical_pos_x = cursor.logical_pos.x;
|
let mut logical_pos_x = self.cursor.logical_pos.x;
|
||||||
let mut logical_pos_y = cursor.logical_pos.y;
|
let mut logical_pos_y = self.cursor.logical_pos.y;
|
||||||
let mut visual_pos_x = cursor.visual_pos.x;
|
let mut visual_pos_x = self.cursor.visual_pos.x;
|
||||||
let mut visual_pos_y = cursor.visual_pos.y;
|
let mut visual_pos_y = self.cursor.visual_pos.y;
|
||||||
let mut column = cursor.column;
|
let mut column = self.cursor.column;
|
||||||
|
|
||||||
let mut logical_target_x = Self::calc_target_x(logical_target, logical_pos_y);
|
let mut logical_target_x = Self::calc_target_x(logical_target, logical_pos_y);
|
||||||
let mut visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
|
let mut visual_target_x = Self::calc_target_x(visual_target, visual_pos_y);
|
||||||
|
@ -177,7 +166,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
// wrap_opp = Wrap Opportunity
|
// wrap_opp = Wrap Opportunity
|
||||||
// These store the position and column of the last wrap opportunity. If `word_wrap_column` is
|
// These store the position and column of the last wrap opportunity. If `word_wrap_column` is
|
||||||
// zero (word wrap disabled), all grapheme clusters are a wrap opportunity, because none are.
|
// zero (word wrap disabled), all grapheme clusters are a wrap opportunity, because none are.
|
||||||
let mut wrap_opp = cursor.wrap_opp;
|
let mut wrap_opp = self.cursor.wrap_opp;
|
||||||
let mut wrap_opp_offset = offset;
|
let mut wrap_opp_offset = offset;
|
||||||
let mut wrap_opp_logical_pos_x = logical_pos_x;
|
let mut wrap_opp_logical_pos_x = logical_pos_x;
|
||||||
let mut wrap_opp_visual_pos_x = visual_pos_x;
|
let mut wrap_opp_visual_pos_x = visual_pos_x;
|
||||||
|
@ -209,7 +198,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
loop {
|
loop {
|
||||||
if !chunk_iter.has_next() {
|
if !chunk_iter.has_next() {
|
||||||
cold_path();
|
cold_path();
|
||||||
chunk_iter = Utf8Chars::new(buffer.read_forward(chunk_range.end), 0);
|
chunk_iter = Utf8Chars::new(self.buffer.read_forward(chunk_range.end), 0);
|
||||||
chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
|
chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,7 +208,8 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
// Similar applies to the width.
|
// Similar applies to the width.
|
||||||
props_last_char = props_next_cluster;
|
props_last_char = props_next_cluster;
|
||||||
offset_next_cluster = chunk_range.start + chunk_iter.offset();
|
offset_next_cluster = chunk_range.start + chunk_iter.offset();
|
||||||
width += ucd_grapheme_cluster_character_width(props_next_cluster) as CoordType;
|
width += ucd_grapheme_cluster_character_width(props_next_cluster, ambiguous_width())
|
||||||
|
as CoordType;
|
||||||
|
|
||||||
// The `Document::read_forward` interface promises us that it will not split
|
// The `Document::read_forward` interface promises us that it will not split
|
||||||
// grapheme clusters across chunks. Therefore, we can safely break here.
|
// grapheme clusters across chunks. Therefore, we can safely break here.
|
||||||
|
@ -252,10 +242,10 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
|
|
||||||
// Tabs require special handling because they can have a variable width.
|
// Tabs require special handling because they can have a variable width.
|
||||||
if props_last_char == ucd_tab_properties() {
|
if props_last_char == ucd_tab_properties() {
|
||||||
// SAFETY: `tab_size` is clamped to >= 1 in `with_tab_size`.
|
// SAFETY: `self.tab_size` is clamped to >= 1 in `with_tab_size`.
|
||||||
// This assert ensures that Rust doesn't insert panicking null checks.
|
// This assert ensures that Rust doesn't insert panicking null checks.
|
||||||
unsafe { std::hint::assert_unchecked(tab_size >= 1) };
|
unsafe { std::hint::assert_unchecked(self.tab_size >= 1) };
|
||||||
width = tab_size - (column % tab_size);
|
width = self.tab_size - (column % self.tab_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hard wrap: Both the logical and visual position advance by one line.
|
// Hard wrap: Both the logical and visual position advance by one line.
|
||||||
|
@ -290,7 +280,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
|
|
||||||
// Since this code above may need to revert to a previous `wrap_opp_*`,
|
// Since this code above may need to revert to a previous `wrap_opp_*`,
|
||||||
// it must be done before advancing / checking for `ucd_line_break_joins`.
|
// it must be done before advancing / checking for `ucd_line_break_joins`.
|
||||||
if word_wrap_column > 0 && visual_pos_x + width > word_wrap_column {
|
if self.word_wrap_column > 0 && visual_pos_x + width > self.word_wrap_column {
|
||||||
if !wrap_opp {
|
if !wrap_opp {
|
||||||
// Otherwise, the lack of a wrap opportunity means that a single word
|
// Otherwise, the lack of a wrap opportunity means that a single word
|
||||||
// is wider than the word wrap column. We need to force-break the word.
|
// is wider than the word wrap column. We need to force-break the word.
|
||||||
|
@ -342,7 +332,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
visual_pos_x += width;
|
visual_pos_x += width;
|
||||||
column += width;
|
column += width;
|
||||||
|
|
||||||
if word_wrap_column > 0
|
if self.word_wrap_column > 0
|
||||||
&& !ucd_line_break_joins(props_current_cluster, props_next_cluster)
|
&& !ucd_line_break_joins(props_current_cluster, props_next_cluster)
|
||||||
{
|
{
|
||||||
wrap_opp = true;
|
wrap_opp = true;
|
||||||
|
@ -355,7 +345,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
|
|
||||||
// If we're here, we hit our target. Now the only question is:
|
// If we're here, we hit our target. Now the only question is:
|
||||||
// Is the word we're currently on so wide that it will be wrapped further down the document?
|
// Is the word we're currently on so wide that it will be wrapped further down the document?
|
||||||
if word_wrap_column > 0 {
|
if self.word_wrap_column > 0 {
|
||||||
if !wrap_opp {
|
if !wrap_opp {
|
||||||
// If the current laid-out line had no wrap opportunities, it means we had an input
|
// If the current laid-out line had no wrap opportunities, it means we had an input
|
||||||
// such as "fooooooooooooooooooooo" at a `word_wrap_column` of e.g. 10. The word
|
// such as "fooooooooooooooooooooo" at a `word_wrap_column` of e.g. 10. The word
|
||||||
|
@ -386,7 +376,8 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
loop {
|
loop {
|
||||||
if !chunk_iter.has_next() {
|
if !chunk_iter.has_next() {
|
||||||
cold_path();
|
cold_path();
|
||||||
chunk_iter = Utf8Chars::new(buffer.read_forward(chunk_range.end), 0);
|
chunk_iter =
|
||||||
|
Utf8Chars::new(self.buffer.read_forward(chunk_range.end), 0);
|
||||||
chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
|
chunk_range = chunk_range.end..chunk_range.end + chunk_iter.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -396,8 +387,10 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
// Similar applies to the width.
|
// Similar applies to the width.
|
||||||
props_last_char = props_next_cluster;
|
props_last_char = props_next_cluster;
|
||||||
offset_next_cluster = chunk_range.start + chunk_iter.offset();
|
offset_next_cluster = chunk_range.start + chunk_iter.offset();
|
||||||
width +=
|
width += ucd_grapheme_cluster_character_width(
|
||||||
ucd_grapheme_cluster_character_width(props_next_cluster) as CoordType;
|
props_next_cluster,
|
||||||
|
ambiguous_width(),
|
||||||
|
) as CoordType;
|
||||||
|
|
||||||
// The `Document::read_forward` interface promises us that it will not split
|
// The `Document::read_forward` interface promises us that it will not split
|
||||||
// grapheme clusters across chunks. Therefore, we can safely break here.
|
// grapheme clusters across chunks. Therefore, we can safely break here.
|
||||||
|
@ -431,10 +424,10 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
|
|
||||||
// Tabs require special handling because they can have a variable width.
|
// Tabs require special handling because they can have a variable width.
|
||||||
if props_last_char == ucd_tab_properties() {
|
if props_last_char == ucd_tab_properties() {
|
||||||
// SAFETY: `tab_size` is clamped to >= 1 in `with_tab_size`.
|
// SAFETY: `self.tab_size` is clamped to >= 1 in `with_tab_size`.
|
||||||
// This assert ensures that Rust doesn't insert panicking null checks.
|
// This assert ensures that Rust doesn't insert panicking null checks.
|
||||||
unsafe { std::hint::assert_unchecked(tab_size >= 1) };
|
unsafe { std::hint::assert_unchecked(self.tab_size >= 1) };
|
||||||
width = tab_size - (column % tab_size);
|
width = self.tab_size - (column % self.tab_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hard wrap: Both the logical and visual position advance by one line.
|
// Hard wrap: Both the logical and visual position advance by one line.
|
||||||
|
@ -444,7 +437,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
|
|
||||||
visual_pos_x_lookahead += width;
|
visual_pos_x_lookahead += width;
|
||||||
|
|
||||||
if visual_pos_x_lookahead > word_wrap_column {
|
if visual_pos_x_lookahead > self.word_wrap_column {
|
||||||
visual_pos_x -= wrap_opp_visual_pos_x;
|
visual_pos_x -= wrap_opp_visual_pos_x;
|
||||||
visual_pos_y += 1;
|
visual_pos_y += 1;
|
||||||
break;
|
break;
|
||||||
|
@ -467,13 +460,12 @@ impl<'doc> MeasurementConfig<'doc> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Cursor {
|
self.cursor.offset = offset;
|
||||||
offset,
|
self.cursor.logical_pos = Point { x: logical_pos_x, y: logical_pos_y };
|
||||||
logical_pos: Point { x: logical_pos_x, y: logical_pos_y },
|
self.cursor.visual_pos = Point { x: visual_pos_x, y: visual_pos_y };
|
||||||
visual_pos: Point { x: visual_pos_x, y: visual_pos_y },
|
self.cursor.column = column;
|
||||||
column,
|
self.cursor.wrap_opp = wrap_opp;
|
||||||
wrap_opp,
|
self.cursor
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -247,12 +247,15 @@ Usage: grapheme-table-gen [options...] <ucd.nounihan.grouped.xml>
|
||||||
Expose tab and linefeed as grapheme cluster properties
|
Expose tab and linefeed as grapheme cluster properties
|
||||||
--no-ambiguous Treat all ambiguous characters as narrow
|
--no-ambiguous Treat all ambiguous characters as narrow
|
||||||
--line-breaks Store and expose line break information
|
--line-breaks Store and expose line break information
|
||||||
|
|
||||||
|
Download ucd.nounihan.grouped.xml at:
|
||||||
|
https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.nounihan.grouped.zip
|
||||||
";
|
";
|
||||||
|
|
||||||
fn main() -> anyhow::Result<()> {
|
fn main() -> anyhow::Result<()> {
|
||||||
let mut args = pico_args::Arguments::from_env();
|
let mut args = pico_args::Arguments::from_env();
|
||||||
if args.contains(["-h", "--help"]) {
|
if args.contains(["-h", "--help"]) {
|
||||||
eprint!("{}", HELP);
|
eprint!("{HELP}");
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,7 +313,7 @@ fn main() -> anyhow::Result<()> {
|
||||||
for s in &out.trie.stages {
|
for s in &out.trie.stages {
|
||||||
actual = s.values[actual as usize + ((cp >> s.shift) & s.mask)];
|
actual = s.values[actual as usize + ((cp >> s.shift) & s.mask)];
|
||||||
}
|
}
|
||||||
assert_eq!(expected.value(), actual, "trie sanity check failed for U+{:04X}", cp);
|
assert_eq!(expected.value(), actual, "trie sanity check failed for U+{cp:04X}");
|
||||||
}
|
}
|
||||||
for (cp, &expected) in out.ucd.values[..0x80].iter().enumerate() {
|
for (cp, &expected) in out.ucd.values[..0x80].iter().enumerate() {
|
||||||
let last = out.trie.stages.last().unwrap();
|
let last = out.trie.stages.last().unwrap();
|
||||||
|
@ -318,8 +321,7 @@ fn main() -> anyhow::Result<()> {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
expected.value(),
|
expected.value(),
|
||||||
actual,
|
actual,
|
||||||
"trie sanity check failed for direct ASCII mapping of U+{:04X}",
|
"trie sanity check failed for direct ASCII mapping of U+{cp:04X}"
|
||||||
cp
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -372,7 +374,7 @@ fn generate_c(out: Output) -> String {
|
||||||
for table in &out.rules_gc {
|
for table in &out.rules_gc {
|
||||||
buf.push_str(" {\n");
|
buf.push_str(" {\n");
|
||||||
for &r in table {
|
for &r in table {
|
||||||
_ = writeln!(buf, " 0b{:032b},", r);
|
_ = writeln!(buf, " 0b{r:032b},");
|
||||||
}
|
}
|
||||||
buf.push_str(" },\n");
|
buf.push_str(" },\n");
|
||||||
}
|
}
|
||||||
|
@ -443,14 +445,37 @@ fn generate_c(out: Output) -> String {
|
||||||
{{
|
{{
|
||||||
return state == 3;
|
return state == 3;
|
||||||
}}
|
}}
|
||||||
inline int ucd_grapheme_cluster_character_width(const int val)
|
|
||||||
{{
|
|
||||||
return val >> {1};
|
|
||||||
}}
|
|
||||||
",
|
",
|
||||||
out.ucd.packing.mask_cluster_break,
|
out.ucd.packing.mask_cluster_break,
|
||||||
|
);
|
||||||
|
|
||||||
|
if out.arg_no_ambiguous {
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
inline int ucd_grapheme_cluster_character_width(const int val)
|
||||||
|
{{
|
||||||
|
return val >> {};
|
||||||
|
}}
|
||||||
|
",
|
||||||
out.ucd.packing.shift_character_width,
|
out.ucd.packing.shift_character_width,
|
||||||
);
|
);
|
||||||
|
} else {
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
inline int ucd_grapheme_cluster_character_width(const int val, int ambiguous_width)
|
||||||
|
{{
|
||||||
|
int w = val >> {};
|
||||||
|
if (w == 3) {{
|
||||||
|
w = ambiguous_width;
|
||||||
|
}}
|
||||||
|
return w;
|
||||||
|
}}
|
||||||
|
",
|
||||||
|
out.ucd.packing.shift_character_width,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if out.arg_line_breaks {
|
if out.arg_line_breaks {
|
||||||
_ = writedoc!(
|
_ = writedoc!(
|
||||||
|
@ -546,7 +571,7 @@ fn generate_rust(out: Output) -> String {
|
||||||
for table in &out.rules_gc {
|
for table in &out.rules_gc {
|
||||||
buf.push_str(" [\n");
|
buf.push_str(" [\n");
|
||||||
for &r in table {
|
for &r in table {
|
||||||
_ = writeln!(buf, " 0b{:032b},", r);
|
_ = writeln!(buf, " 0b{r:032b},");
|
||||||
}
|
}
|
||||||
buf.push_str(" ],\n");
|
buf.push_str(" ],\n");
|
||||||
}
|
}
|
||||||
|
@ -622,14 +647,42 @@ fn generate_rust(out: Output) -> String {
|
||||||
pub fn ucd_grapheme_cluster_joins_done(state: u32) -> bool {{
|
pub fn ucd_grapheme_cluster_joins_done(state: u32) -> bool {{
|
||||||
state == 3
|
state == 3
|
||||||
}}
|
}}
|
||||||
#[inline(always)]
|
|
||||||
pub fn ucd_grapheme_cluster_character_width(val: usize) -> usize {{
|
|
||||||
val >> {1}
|
|
||||||
}}
|
|
||||||
",
|
",
|
||||||
out.ucd.packing.mask_cluster_break,
|
out.ucd.packing.mask_cluster_break,
|
||||||
|
);
|
||||||
|
|
||||||
|
if out.arg_no_ambiguous {
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ucd_grapheme_cluster_character_width(val: usize) -> usize {{
|
||||||
|
val >> {}
|
||||||
|
}}
|
||||||
|
",
|
||||||
out.ucd.packing.shift_character_width,
|
out.ucd.packing.shift_character_width,
|
||||||
);
|
);
|
||||||
|
} else {
|
||||||
|
// `cold_path()` ensures that LLVM emits a branch instead of a conditional move.
|
||||||
|
// This improves performance, as ambiguous characters are rare.
|
||||||
|
// `> 2` is used instead of `== 3`, because this way the compiler can immediately
|
||||||
|
// test whether `val > (2 << shift_character_width)` before shifting.
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ucd_grapheme_cluster_character_width(val: usize, ambiguous_width: usize) -> usize {{
|
||||||
|
let mut w = val >> {};
|
||||||
|
if w > 2 {{
|
||||||
|
cold_path();
|
||||||
|
w = ambiguous_width;
|
||||||
|
}}
|
||||||
|
w
|
||||||
|
}}
|
||||||
|
",
|
||||||
|
out.ucd.packing.shift_character_width,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if out.arg_line_breaks {
|
if out.arg_line_breaks {
|
||||||
_ = writedoc!(
|
_ = writedoc!(
|
||||||
|
@ -681,6 +734,17 @@ fn generate_rust(out: Output) -> String {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !out.arg_no_ambiguous {
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
#[cold]
|
||||||
|
#[inline(always)]
|
||||||
|
fn cold_path() {{}}
|
||||||
|
"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
buf.push_str("// END: Generated by grapheme-table-gen\n");
|
buf.push_str("// END: Generated by grapheme-table-gen\n");
|
||||||
buf
|
buf
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue