Some minor framebuffer optimizations & comments

This commit is contained in:
Leonard Hecker 2025-04-09 16:55:13 +02:00
parent cee02c45b1
commit 2cdbf773aa
4 changed files with 83 additions and 34 deletions

View file

@ -1,7 +1,7 @@
use criterion::{Criterion, Throughput, criterion_group, criterion_main};
use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
use edit::helpers::*;
use edit::ucd::MeasurementConfig;
use std::hint::black_box;
use edit::memchr;
use edit::ucd;
fn bench(c: &mut Criterion) {
let reference = concat!(
@ -13,21 +13,31 @@ fn bench(c: &mut Criterion) {
let buffer = reference.repeat(10);
let bytes = buffer.as_bytes();
let mut group = c.benchmark_group("ucd");
let mut group = c.benchmark_group("ucd::MeasurementConfig::goto_logical");
group.throughput(Throughput::Bytes(bytes.len() as u64));
group.bench_function("MeasurementConfig::goto_logical", |b| {
b.iter(|| black_box(MeasurementConfig::new(&bytes).goto_logical(Point::MAX)))
group.bench_function("basic", |b| {
b.iter(|| ucd::MeasurementConfig::new(&bytes).goto_logical(Point::MAX))
});
group.bench_function("MeasurementConfig::goto_logical with word wrap", |b| {
group.bench_function("word_wrap", |b| {
b.iter(|| {
black_box(
MeasurementConfig::new(&bytes)
ucd::MeasurementConfig::new(&bytes)
.with_word_wrap_column(50)
.goto_logical(Point::MAX),
)
.goto_logical(Point::MAX)
})
});
group.finish();
let mut group = c.benchmark_group("memchr::memchr2");
let mut buffer = [0u8; 8192];
for &size in &[0usize, 8, 64, 4096] {
group.throughput(Throughput::Bytes(size as u64 + 1));
group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, &size| {
buffer.fill(b'a');
buffer[size] = b'\n';
b.iter(|| memchr::memchr2(b'\n', b'\r', &buffer[..size], 0));
});
}
group.finish();
}
criterion_group!(benches, bench);

View file

@ -632,34 +632,67 @@ impl Bitmap {
}
fn alpha_blend(dst: u32, src: u32) -> u32 {
let src_r = Self::srgb_to_linear(src & 0xff);
let src_g = Self::srgb_to_linear((src >> 8) & 0xff);
let src_b = Self::srgb_to_linear((src >> 16) & 0xff);
let src_a = (src >> 24) as f32 / 255.0f32;
let src_a = (src >> 24) as f32 * (1.0 / 255.0);
let src_b = Self::srgb_to_linear(src >> 16);
let src_g = Self::srgb_to_linear(src >> 8);
let src_r = Self::srgb_to_linear(src);
let dst_r = Self::srgb_to_linear(dst & 0xff);
let dst_g = Self::srgb_to_linear((dst >> 8) & 0xff);
let dst_b = Self::srgb_to_linear((dst >> 16) & 0xff);
let dst_a = (dst >> 24) as f32 / 255.0f32;
let dst_a = (dst >> 24) as f32 * (1.0 / 255.0);
let dst_b = Self::srgb_to_linear(dst >> 16);
let dst_g = Self::srgb_to_linear(dst >> 8);
let dst_r = Self::srgb_to_linear(dst);
let out_a = src_a + dst_a * (1.0f32 - src_a);
let out_r = (src_r * src_a + dst_r * dst_a * (1.0f32 - src_a)) / out_a;
let out_g = (src_g * src_a + dst_g * dst_a * (1.0f32 - src_a)) / out_a;
let out_b = (src_b * src_a + dst_b * dst_a * (1.0f32 - src_a)) / out_a;
let out_a = src_a + dst_a * (1.0 - src_a);
// The formula is technically:
// (src_bgr * src_a + dst_bgr * dst_a * (1 - src_a)) / out_a
// but we can merge the division of out_a with the two preceding terms.
// This saves us a bunch of operations that cannot be optimized away otherwise.
let out_a_inv = 1.0 / out_a;
let src_mul = src_a * out_a_inv;
let dst_mul = dst_a * (1.0 - src_a) * out_a_inv;
let out_b = src_b * src_mul + dst_b * dst_mul;
let out_g = src_g * src_mul + dst_g * dst_mul;
let out_r = src_r * src_mul + dst_r * dst_mul;
(((out_a * 255.0f32) as u32) << 24)
| (Self::linear_to_srgb(out_b) << 16)
| (Self::linear_to_srgb(out_g) << 8)
| Self::linear_to_srgb(out_r)
let out_b = Self::linear_to_srgb(out_b);
let out_g = Self::linear_to_srgb(out_g);
let out_r = Self::linear_to_srgb(out_r);
(((out_a * 255.0f32) as u32) << 24) | (out_b << 16) | (out_g << 8) | out_r
}
fn srgb_to_linear(c: u32) -> f32 {
let fc = c as f32 / 255.0f32;
if fc <= 0.04045f32 {
fc / 12.92f32
} else {
((fc + 0.055f32) / 1.055f32).powf(2.4f32)
}
// Generated using:
// ```rs
// let fc = c as f32 / 255.0;
// if fc <= 0.04045 {
// fc / 12.92
// } else {
// ((fc + 0.055) / 1.055).powf(2.4)
// }
// ```
// I'd love to use hex floats, but for some reason Rust maintainers decided against it...
#[rustfmt::skip]
#[allow(clippy::excessive_precision)]
const LUT: [f32; 256] = [
0.0000000000, 0.0003035270, 0.0006070540, 0.0009105810, 0.0012141080, 0.0015176350, 0.0018211619, 0.0021246888, 0.0024282159, 0.0027317430, 0.0030352699, 0.0033465356, 0.0036765069, 0.0040247170, 0.0043914421, 0.0047769533,
0.0051815170, 0.0056053917, 0.0060488326, 0.0065120910, 0.0069954102, 0.0074990317, 0.0080231922, 0.0085681248, 0.0091340570, 0.0097212177, 0.0103298230, 0.0109600937, 0.0116122449, 0.0122864870, 0.0129830306, 0.0137020806,
0.0144438436, 0.0152085144, 0.0159962922, 0.0168073755, 0.0176419523, 0.0185002182, 0.0193823613, 0.0202885624, 0.0212190095, 0.0221738834, 0.0231533647, 0.0241576303, 0.0251868572, 0.0262412224, 0.0273208916, 0.0284260381,
0.0295568332, 0.0307134409, 0.0318960287, 0.0331047624, 0.0343398079, 0.0356013142, 0.0368894450, 0.0382043645, 0.0395462364, 0.0409151986, 0.0423114114, 0.0437350273, 0.0451862030, 0.0466650836, 0.0481718220, 0.0497065634,
0.0512694679, 0.0528606549, 0.0544802807, 0.0561284944, 0.0578054339, 0.0595112406, 0.0612460710, 0.0630100295, 0.0648032799, 0.0666259527, 0.0684781820, 0.0703601092, 0.0722718611, 0.0742135793, 0.0761853904, 0.0781874284,
0.0802198276, 0.0822827145, 0.0843762159, 0.0865004659, 0.0886556059, 0.0908417329, 0.0930589810, 0.0953074843, 0.0975873619, 0.0998987406, 0.1022417471, 0.1046164930, 0.1070231125, 0.1094617173, 0.1119324341, 0.1144353822,
0.1169706732, 0.1195384338, 0.1221387982, 0.1247718409, 0.1274376959, 0.1301364899, 0.1328683347, 0.1356333494, 0.1384316236, 0.1412633061, 0.1441284865, 0.1470272839, 0.1499598026, 0.1529261619, 0.1559264660, 0.1589608639,
0.1620294005, 0.1651322246, 0.1682693958, 0.1714410931, 0.1746473908, 0.1778884083, 0.1811642349, 0.1844749898, 0.1878207624, 0.1912016720, 0.1946178079, 0.1980693042, 0.2015562356, 0.2050787061, 0.2086368501, 0.2122307271,
0.2158605307, 0.2195262313, 0.2232279778, 0.2269658893, 0.2307400703, 0.2345506549, 0.2383976579, 0.2422811985, 0.2462013960, 0.2501583695, 0.2541521788, 0.2581829131, 0.2622507215, 0.2663556635, 0.2704978585, 0.2746773660,
0.2788943350, 0.2831487954, 0.2874408960, 0.2917706966, 0.2961383164, 0.3005438447, 0.3049873710, 0.3094689548, 0.3139887452, 0.3185468316, 0.3231432438, 0.3277781308, 0.3324515820, 0.3371636569, 0.3419144452, 0.3467040956,
0.3515326977, 0.3564002514, 0.3613068759, 0.3662526906, 0.3712377846, 0.3762622178, 0.3813261092, 0.3864295185, 0.3915725648, 0.3967553079, 0.4019778669, 0.4072403014, 0.4125427008, 0.4178851545, 0.4232677519, 0.4286905527,
0.4341537058, 0.4396572411, 0.4452012479, 0.4507858455, 0.4564110637, 0.4620770514, 0.4677838385, 0.4735315442, 0.4793202281, 0.4851499796, 0.4910208881, 0.4969330430, 0.5028865933, 0.5088814497, 0.5149177909, 0.5209956765,
0.5271152258, 0.5332764983, 0.5394796133, 0.5457245708, 0.5520114899, 0.5583404899, 0.5647116303, 0.5711249113, 0.5775805116, 0.5840784907, 0.5906189084, 0.5972018838, 0.6038274169, 0.6104956269, 0.6172066331, 0.6239604354,
0.6307572126, 0.6375969648, 0.6444797516, 0.6514056921, 0.6583748460, 0.6653873324, 0.6724432111, 0.6795425415, 0.6866854429, 0.6938719153, 0.7011020184, 0.7083759308, 0.7156936526, 0.7230552435, 0.7304608822, 0.7379105687,
0.7454043627, 0.7529423237, 0.7605246305, 0.7681512833, 0.7758223414, 0.7835379243, 0.7912980318, 0.7991028428, 0.8069523573, 0.8148466945, 0.8227858543, 0.8307699561, 0.8387991190, 0.8468732834, 0.8549926877, 0.8631572723,
0.8713672161, 0.8796223402, 0.8879231811, 0.8962693810, 0.9046613574, 0.9130986929, 0.9215820432, 0.9301108718, 0.9386858940, 0.9473065734, 0.9559735060, 0.9646862745, 0.9734454751, 0.9822505713, 0.9911022186, 1.0000000000,
];
LUT[(c & 0xff) as usize]
}
fn linear_to_srgb(c: f32) -> u32 {

View file

@ -483,6 +483,7 @@ impl<'doc> MeasurementConfig<'doc> {
}
}
#[inline]
fn calc_target_x(target: Point, pos_y: CoordType) -> CoordType {
match pos_y.cmp(&target.y) {
std::cmp::Ordering::Less => CoordType::MAX,

View file

@ -35,6 +35,8 @@ impl<'a> Utf8Chars<'a> {
self.offset < self.source.len()
}
// I found that on mixed 50/50 English/Non-English text,
// performance actually suffers when this gets inlined.
#[cold]
fn next_slow(&mut self, c: u8) -> char {
// See: https://datatracker.ietf.org/doc/html/rfc3629
@ -197,6 +199,9 @@ impl<'a> Utf8Chars<'a> {
impl Iterator for Utf8Chars<'_> {
type Item = char;
// At opt-level="s", this function doesn't get inlined,
// but performance greatly suffers in that case.
#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.offset >= self.source.len() {
return None;