mirror of
https://github.com/microsoft/edit.git
synced 2025-07-03 22:43:22 +00:00
Some minor framebuffer optimizations & comments
This commit is contained in:
parent
cee02c45b1
commit
2cdbf773aa
4 changed files with 83 additions and 34 deletions
|
@ -1,7 +1,7 @@
|
|||
use criterion::{Criterion, Throughput, criterion_group, criterion_main};
|
||||
use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
|
||||
use edit::helpers::*;
|
||||
use edit::ucd::MeasurementConfig;
|
||||
use std::hint::black_box;
|
||||
use edit::memchr;
|
||||
use edit::ucd;
|
||||
|
||||
fn bench(c: &mut Criterion) {
|
||||
let reference = concat!(
|
||||
|
@ -13,21 +13,31 @@ fn bench(c: &mut Criterion) {
|
|||
let buffer = reference.repeat(10);
|
||||
let bytes = buffer.as_bytes();
|
||||
|
||||
let mut group = c.benchmark_group("ucd");
|
||||
let mut group = c.benchmark_group("ucd::MeasurementConfig::goto_logical");
|
||||
group.throughput(Throughput::Bytes(bytes.len() as u64));
|
||||
group.bench_function("MeasurementConfig::goto_logical", |b| {
|
||||
b.iter(|| black_box(MeasurementConfig::new(&bytes).goto_logical(Point::MAX)))
|
||||
group.bench_function("basic", |b| {
|
||||
b.iter(|| ucd::MeasurementConfig::new(&bytes).goto_logical(Point::MAX))
|
||||
});
|
||||
group.bench_function("MeasurementConfig::goto_logical with word wrap", |b| {
|
||||
group.bench_function("word_wrap", |b| {
|
||||
b.iter(|| {
|
||||
black_box(
|
||||
MeasurementConfig::new(&bytes)
|
||||
ucd::MeasurementConfig::new(&bytes)
|
||||
.with_word_wrap_column(50)
|
||||
.goto_logical(Point::MAX),
|
||||
)
|
||||
.goto_logical(Point::MAX)
|
||||
})
|
||||
});
|
||||
group.finish();
|
||||
|
||||
let mut group = c.benchmark_group("memchr::memchr2");
|
||||
let mut buffer = [0u8; 8192];
|
||||
for &size in &[0usize, 8, 64, 4096] {
|
||||
group.throughput(Throughput::Bytes(size as u64 + 1));
|
||||
group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, &size| {
|
||||
buffer.fill(b'a');
|
||||
buffer[size] = b'\n';
|
||||
b.iter(|| memchr::memchr2(b'\n', b'\r', &buffer[..size], 0));
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench);
|
||||
|
|
|
@ -632,34 +632,67 @@ impl Bitmap {
|
|||
}
|
||||
|
||||
fn alpha_blend(dst: u32, src: u32) -> u32 {
|
||||
let src_r = Self::srgb_to_linear(src & 0xff);
|
||||
let src_g = Self::srgb_to_linear((src >> 8) & 0xff);
|
||||
let src_b = Self::srgb_to_linear((src >> 16) & 0xff);
|
||||
let src_a = (src >> 24) as f32 / 255.0f32;
|
||||
let src_a = (src >> 24) as f32 * (1.0 / 255.0);
|
||||
let src_b = Self::srgb_to_linear(src >> 16);
|
||||
let src_g = Self::srgb_to_linear(src >> 8);
|
||||
let src_r = Self::srgb_to_linear(src);
|
||||
|
||||
let dst_r = Self::srgb_to_linear(dst & 0xff);
|
||||
let dst_g = Self::srgb_to_linear((dst >> 8) & 0xff);
|
||||
let dst_b = Self::srgb_to_linear((dst >> 16) & 0xff);
|
||||
let dst_a = (dst >> 24) as f32 / 255.0f32;
|
||||
let dst_a = (dst >> 24) as f32 * (1.0 / 255.0);
|
||||
let dst_b = Self::srgb_to_linear(dst >> 16);
|
||||
let dst_g = Self::srgb_to_linear(dst >> 8);
|
||||
let dst_r = Self::srgb_to_linear(dst);
|
||||
|
||||
let out_a = src_a + dst_a * (1.0f32 - src_a);
|
||||
let out_r = (src_r * src_a + dst_r * dst_a * (1.0f32 - src_a)) / out_a;
|
||||
let out_g = (src_g * src_a + dst_g * dst_a * (1.0f32 - src_a)) / out_a;
|
||||
let out_b = (src_b * src_a + dst_b * dst_a * (1.0f32 - src_a)) / out_a;
|
||||
let out_a = src_a + dst_a * (1.0 - src_a);
|
||||
// The formula is technically:
|
||||
// (src_bgr * src_a + dst_bgr * dst_a * (1 - src_a)) / out_a
|
||||
// but we can merge the division of out_a with the two preceding terms.
|
||||
// This saves us a bunch of operations that cannot be optimized away otherwise.
|
||||
let out_a_inv = 1.0 / out_a;
|
||||
let src_mul = src_a * out_a_inv;
|
||||
let dst_mul = dst_a * (1.0 - src_a) * out_a_inv;
|
||||
let out_b = src_b * src_mul + dst_b * dst_mul;
|
||||
let out_g = src_g * src_mul + dst_g * dst_mul;
|
||||
let out_r = src_r * src_mul + dst_r * dst_mul;
|
||||
|
||||
(((out_a * 255.0f32) as u32) << 24)
|
||||
| (Self::linear_to_srgb(out_b) << 16)
|
||||
| (Self::linear_to_srgb(out_g) << 8)
|
||||
| Self::linear_to_srgb(out_r)
|
||||
let out_b = Self::linear_to_srgb(out_b);
|
||||
let out_g = Self::linear_to_srgb(out_g);
|
||||
let out_r = Self::linear_to_srgb(out_r);
|
||||
|
||||
(((out_a * 255.0f32) as u32) << 24) | (out_b << 16) | (out_g << 8) | out_r
|
||||
}
|
||||
|
||||
fn srgb_to_linear(c: u32) -> f32 {
|
||||
let fc = c as f32 / 255.0f32;
|
||||
if fc <= 0.04045f32 {
|
||||
fc / 12.92f32
|
||||
} else {
|
||||
((fc + 0.055f32) / 1.055f32).powf(2.4f32)
|
||||
}
|
||||
// Generated using:
|
||||
// ```rs
|
||||
// let fc = c as f32 / 255.0;
|
||||
// if fc <= 0.04045 {
|
||||
// fc / 12.92
|
||||
// } else {
|
||||
// ((fc + 0.055) / 1.055).powf(2.4)
|
||||
// }
|
||||
// ```
|
||||
// I'd love to use hex floats, but for some reason Rust maintainers decided against it...
|
||||
#[rustfmt::skip]
|
||||
#[allow(clippy::excessive_precision)]
|
||||
const LUT: [f32; 256] = [
|
||||
0.0000000000, 0.0003035270, 0.0006070540, 0.0009105810, 0.0012141080, 0.0015176350, 0.0018211619, 0.0021246888, 0.0024282159, 0.0027317430, 0.0030352699, 0.0033465356, 0.0036765069, 0.0040247170, 0.0043914421, 0.0047769533,
|
||||
0.0051815170, 0.0056053917, 0.0060488326, 0.0065120910, 0.0069954102, 0.0074990317, 0.0080231922, 0.0085681248, 0.0091340570, 0.0097212177, 0.0103298230, 0.0109600937, 0.0116122449, 0.0122864870, 0.0129830306, 0.0137020806,
|
||||
0.0144438436, 0.0152085144, 0.0159962922, 0.0168073755, 0.0176419523, 0.0185002182, 0.0193823613, 0.0202885624, 0.0212190095, 0.0221738834, 0.0231533647, 0.0241576303, 0.0251868572, 0.0262412224, 0.0273208916, 0.0284260381,
|
||||
0.0295568332, 0.0307134409, 0.0318960287, 0.0331047624, 0.0343398079, 0.0356013142, 0.0368894450, 0.0382043645, 0.0395462364, 0.0409151986, 0.0423114114, 0.0437350273, 0.0451862030, 0.0466650836, 0.0481718220, 0.0497065634,
|
||||
0.0512694679, 0.0528606549, 0.0544802807, 0.0561284944, 0.0578054339, 0.0595112406, 0.0612460710, 0.0630100295, 0.0648032799, 0.0666259527, 0.0684781820, 0.0703601092, 0.0722718611, 0.0742135793, 0.0761853904, 0.0781874284,
|
||||
0.0802198276, 0.0822827145, 0.0843762159, 0.0865004659, 0.0886556059, 0.0908417329, 0.0930589810, 0.0953074843, 0.0975873619, 0.0998987406, 0.1022417471, 0.1046164930, 0.1070231125, 0.1094617173, 0.1119324341, 0.1144353822,
|
||||
0.1169706732, 0.1195384338, 0.1221387982, 0.1247718409, 0.1274376959, 0.1301364899, 0.1328683347, 0.1356333494, 0.1384316236, 0.1412633061, 0.1441284865, 0.1470272839, 0.1499598026, 0.1529261619, 0.1559264660, 0.1589608639,
|
||||
0.1620294005, 0.1651322246, 0.1682693958, 0.1714410931, 0.1746473908, 0.1778884083, 0.1811642349, 0.1844749898, 0.1878207624, 0.1912016720, 0.1946178079, 0.1980693042, 0.2015562356, 0.2050787061, 0.2086368501, 0.2122307271,
|
||||
0.2158605307, 0.2195262313, 0.2232279778, 0.2269658893, 0.2307400703, 0.2345506549, 0.2383976579, 0.2422811985, 0.2462013960, 0.2501583695, 0.2541521788, 0.2581829131, 0.2622507215, 0.2663556635, 0.2704978585, 0.2746773660,
|
||||
0.2788943350, 0.2831487954, 0.2874408960, 0.2917706966, 0.2961383164, 0.3005438447, 0.3049873710, 0.3094689548, 0.3139887452, 0.3185468316, 0.3231432438, 0.3277781308, 0.3324515820, 0.3371636569, 0.3419144452, 0.3467040956,
|
||||
0.3515326977, 0.3564002514, 0.3613068759, 0.3662526906, 0.3712377846, 0.3762622178, 0.3813261092, 0.3864295185, 0.3915725648, 0.3967553079, 0.4019778669, 0.4072403014, 0.4125427008, 0.4178851545, 0.4232677519, 0.4286905527,
|
||||
0.4341537058, 0.4396572411, 0.4452012479, 0.4507858455, 0.4564110637, 0.4620770514, 0.4677838385, 0.4735315442, 0.4793202281, 0.4851499796, 0.4910208881, 0.4969330430, 0.5028865933, 0.5088814497, 0.5149177909, 0.5209956765,
|
||||
0.5271152258, 0.5332764983, 0.5394796133, 0.5457245708, 0.5520114899, 0.5583404899, 0.5647116303, 0.5711249113, 0.5775805116, 0.5840784907, 0.5906189084, 0.5972018838, 0.6038274169, 0.6104956269, 0.6172066331, 0.6239604354,
|
||||
0.6307572126, 0.6375969648, 0.6444797516, 0.6514056921, 0.6583748460, 0.6653873324, 0.6724432111, 0.6795425415, 0.6866854429, 0.6938719153, 0.7011020184, 0.7083759308, 0.7156936526, 0.7230552435, 0.7304608822, 0.7379105687,
|
||||
0.7454043627, 0.7529423237, 0.7605246305, 0.7681512833, 0.7758223414, 0.7835379243, 0.7912980318, 0.7991028428, 0.8069523573, 0.8148466945, 0.8227858543, 0.8307699561, 0.8387991190, 0.8468732834, 0.8549926877, 0.8631572723,
|
||||
0.8713672161, 0.8796223402, 0.8879231811, 0.8962693810, 0.9046613574, 0.9130986929, 0.9215820432, 0.9301108718, 0.9386858940, 0.9473065734, 0.9559735060, 0.9646862745, 0.9734454751, 0.9822505713, 0.9911022186, 1.0000000000,
|
||||
];
|
||||
LUT[(c & 0xff) as usize]
|
||||
}
|
||||
|
||||
fn linear_to_srgb(c: f32) -> u32 {
|
||||
|
|
|
@ -483,6 +483,7 @@ impl<'doc> MeasurementConfig<'doc> {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn calc_target_x(target: Point, pos_y: CoordType) -> CoordType {
|
||||
match pos_y.cmp(&target.y) {
|
||||
std::cmp::Ordering::Less => CoordType::MAX,
|
||||
|
|
|
@ -35,6 +35,8 @@ impl<'a> Utf8Chars<'a> {
|
|||
self.offset < self.source.len()
|
||||
}
|
||||
|
||||
// I found that on mixed 50/50 English/Non-English text,
|
||||
// performance actually suffers when this gets inlined.
|
||||
#[cold]
|
||||
fn next_slow(&mut self, c: u8) -> char {
|
||||
// See: https://datatracker.ietf.org/doc/html/rfc3629
|
||||
|
@ -197,6 +199,9 @@ impl<'a> Utf8Chars<'a> {
|
|||
impl Iterator for Utf8Chars<'_> {
|
||||
type Item = char;
|
||||
|
||||
// At opt-level="s", this function doesn't get inlined,
|
||||
// but performance greatly suffers in that case.
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.offset >= self.source.len() {
|
||||
return None;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue