mirror of
https://github.com/microsoft/edit.git
synced 2025-07-03 14:33:22 +00:00
Initial import
This commit is contained in:
commit
f654981a07
37 changed files with 15264 additions and 0 deletions
48
.github/workflows/build.yml
vendored
Normal file
48
.github/workflows/build.yml
vendored
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
name: build
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
env:
|
||||||
|
CARGO_TERM_COLOR: always
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: windows-2022
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
toolchain:
|
||||||
|
- nightly
|
||||||
|
arch:
|
||||||
|
- x64
|
||||||
|
- arm64
|
||||||
|
steps:
|
||||||
|
# The Windows runners have autocrlf enabled by default.
|
||||||
|
- name: Disable git autocrlf
|
||||||
|
run: git config --global core.autocrlf false
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Install nightly
|
||||||
|
run: |
|
||||||
|
rustup toolchain install --no-self-update --profile minimal --component rust-src -- nightly
|
||||||
|
rustup default nightly
|
||||||
|
rustup target add ${{ matrix.arch == 'arm64' && 'aarch64-pc-windows-msvc' || 'x86_64-pc-windows-msvc' }}
|
||||||
|
- name: Test
|
||||||
|
if: matrix.arch == 'x64'
|
||||||
|
run: cargo test
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
if ("${{ matrix.arch }}" -eq "arm64") {
|
||||||
|
.\tools\build_release_windows.bat --target aarch64-pc-windows-msvc
|
||||||
|
} else {
|
||||||
|
.\tools\build_release_windows.bat
|
||||||
|
}
|
||||||
|
- name: Upload
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: Windows ${{ matrix.arch }}
|
||||||
|
path: |
|
||||||
|
${{ github.workspace }}/target/${{ matrix.arch == 'arm64' && 'aarch64-pc-windows-msvc/release' || 'release' }}/edit.exe
|
||||||
|
${{ github.workspace }}/target/${{ matrix.arch == 'arm64' && 'aarch64-pc-windows-msvc/release' || 'release' }}/edit.pdb
|
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
.idea
|
||||||
|
.vs
|
||||||
|
*.user
|
||||||
|
bin
|
||||||
|
CMakeSettings.json
|
||||||
|
obj
|
||||||
|
out
|
||||||
|
target
|
17
.vscode/launch.json
vendored
Normal file
17
.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
{
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Launch Debug",
|
||||||
|
"preLaunchTask": "rust: cargo build",
|
||||||
|
"type": "cppvsdbg",
|
||||||
|
"request": "launch",
|
||||||
|
"console": "externalTerminal",
|
||||||
|
"program": "${workspaceFolder}/target/debug/edit",
|
||||||
|
"args": [
|
||||||
|
"${workspaceFolder}/README.md"
|
||||||
|
],
|
||||||
|
"cwd": "${workspaceFolder}",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
24
.vscode/tasks.json
vendored
Normal file
24
.vscode/tasks.json
vendored
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
{
|
||||||
|
"version": "2.0.0",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"label": "rust: cargo build",
|
||||||
|
"type": "process",
|
||||||
|
"command": "cargo",
|
||||||
|
"args": [
|
||||||
|
"build",
|
||||||
|
"--package",
|
||||||
|
"edit",
|
||||||
|
"--features",
|
||||||
|
"debug-latency"
|
||||||
|
],
|
||||||
|
"group": {
|
||||||
|
"kind": "build",
|
||||||
|
"isDefault": true
|
||||||
|
},
|
||||||
|
"problemMatcher": [
|
||||||
|
"$rustc"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
90
Cargo.lock
generated
Normal file
90
Cargo.lock
generated
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "edit"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.171"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
34
Cargo.toml
Normal file
34
Cargo.toml
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
[package]
|
||||||
|
name = "edit"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
debug-layout = []
|
||||||
|
debug-latency = []
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
codegen-units = 1
|
||||||
|
debug = "full"
|
||||||
|
lto = true
|
||||||
|
panic = "abort"
|
||||||
|
debug-assertions = true # Temporary while I test this
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
|
||||||
|
[target.'cfg(unix)'.dependencies]
|
||||||
|
libc = "0.2"
|
||||||
|
|
||||||
|
[target.'cfg(windows)'.dependencies.windows-sys]
|
||||||
|
version = "0.59"
|
||||||
|
features = [
|
||||||
|
"Win32_Globalization",
|
||||||
|
"Win32_Security",
|
||||||
|
"Win32_Storage_FileSystem",
|
||||||
|
"Win32_System_Console",
|
||||||
|
"Win32_System_Diagnostics_Debug",
|
||||||
|
"Win32_System_IO",
|
||||||
|
"Win32_System_LibraryLoader",
|
||||||
|
"Win32_System_Memory",
|
||||||
|
"Win32_System_Threading",
|
||||||
|
]
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
3
README.md
Normal file
3
README.md
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
# MS-DOS Editor Redux
|
||||||
|
|
||||||
|
TBA
|
26
assets/Microsoft_logo_(1980).svg
Normal file
26
assets/Microsoft_logo_(1980).svg
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
<!-- Source: https://commons.wikimedia.org/wiki/File:Microsoft_logo_(1980).svg -->
|
||||||
|
<!-- License: Public domain -->
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" id="svg8" version="1.1" viewBox="0 0 264.58333 52.916669" height="200" width="1000">
|
||||||
|
<defs id="defs2"/>
|
||||||
|
<metadata id="metadata5">
|
||||||
|
<rdf:RDF>
|
||||||
|
<cc:Work rdf:about="">
|
||||||
|
<dc:format>image/svg+xml</dc:format>
|
||||||
|
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
|
||||||
|
<dc:title/>
|
||||||
|
</cc:Work>
|
||||||
|
</rdf:RDF>
|
||||||
|
</metadata>
|
||||||
|
<g id="layer2">
|
||||||
|
<path style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" d="M 0,52.916667 33.602084,20.902084 V 34.925001 L 48.418751,20.902084 v 13.758334 h 8.73125 V 0.26458334 L 42.333334,15.08125 V 0.26458334 L 0,42.597917 Z" id="path847"/>
|
||||||
|
<path style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" d="M 67.468752,0.26458334 58.737501,9.2604169 V 34.660418 h 8.731251 z" id="path849"/>
|
||||||
|
<path transform="scale(0.26458334)" d="m 301.16016,1 c -21.9507,4.4255933 -39.58425,23.383151 -45.24024,48 H 255 V 53.673828 78.277344 82 h 0.69727 c 5.39479,25.07886 23.17116,44.48439 45.38085,49 H 343 v -30 h -20 v -0.004 C 322.83335,100.999 322.66667,101 322.5,101 303.44618,101 288,85.553824 288,66.5 288,47.446176 303.44618,32 322.5,32 H 342 L 372,1 Z" style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.567005;stop-color:#000000" id="path848"/>
|
||||||
|
<path transform="scale(0.26458334)" d="m 383,1 -33,34 v 96 h 33 V 33 h 18.5 c 9.66498,0 17.5,7.835017 17.5,17.5 0,9.664983 -7.83502,17.5 -17.5,17.5 H 387 L 521,199 V 157 L 487,123 443.33594,78.365234 A 47.000001,50 0 0 0 451,51 47.000001,50 0 0 0 405.00977,1.0117188 L 405,1 Z" style="display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.999999px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" id="path864"/>
|
||||||
|
<path transform="scale(0.26458334)" d="M 525.86523,1 A 68,66.499996 0 0 0 458,67.5 68,66.499996 0 0 0 526,134 68,66.499996 0 0 0 594,67.5 68,66.499996 0 0 0 526,1 68,66.499996 0 0 0 525.86523,1 Z m -1.60546,31 A 36.499998,36.000002 0 0 1 524.5,32 36.499998,36.000002 0 0 1 561,68 36.499998,36.000002 0 0 1 524.5,104 36.499998,36.000002 0 0 1 488,68 36.499998,36.000002 0 0 1 524.25977,32 Z" style="display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.567001;stop-color:#000000" id="path881"/>
|
||||||
|
<path transform="scale(0.26458334)" d="m 620.5,1 c -22.36753,-5.5e-7 -40.5,18.132467 -40.5,40.5 0,22.367533 18.13247,40.500001 40.5,40.5 h 2.5 c 11.59798,0 21,4.477153 21,10 0,5.522847 -9.40202,10 -21,10 h -40 v 29 h 62.99999 c 18.43887,-4.06734 31.56367,-20.13433 31.56446,-38.605479 C 677.56392,78.310576 669.87456,65.292316 657.38281,58.226562 640.78385,50.357003 632.38254,48.035667 620.15625,48 615.01343,46.201489 612.00162,43.42696 612,40.486328 611.9995,36.896878 616.47115,33.613784 623.55859,32 H 677 C 686.99999,16.999999 695.99998,9 709,1 h -84 z" style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.567001;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;stop-color:#000000;stop-opacity:1" id="rect942"/>
|
||||||
|
<path transform="scale(0.26458334)" d="M 743,0 A 68.999999,68.500001 0 0 0 674,68.5 68.999999,68.500001 0 0 0 743,137 68.999999,68.500001 0 0 0 812,68.5 68.999999,68.500001 0 0 0 743,0 Z m 0.5,32 A 37.499999,36.500002 0 0 1 781,68.5 37.499999,36.500002 0 0 1 743.5,105 37.499999,36.500002 0 0 1 706,68.5 37.499999,36.500002 0 0 1 743.5,32 Z" style="font-variation-settings:normal;opacity:1;vector-effect:none;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.567001;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;stop-color:#000000;stop-opacity:1" id="path881-7"/>
|
||||||
|
<path id="path1086" d="m 232.03959,22.754167 v -8.73125 h -8.46667 V 8.9958336 h 9.26042 l 8.73125,-8.73125026 H 223.83751 L 214.84167,9.2604169 V 52.652085 l 8.73125,-7.672917 V 22.754167 Z" style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"/>
|
||||||
|
<path style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" d="m 251.88334,8.9958335 3.70414,-2e-7 8.7313,-8.73125014 h -20.10839 l -8.73122,8.73125034 h 7.67292 V 34.660417 l 8.7312,-7.672917 z" id="path1086-2"/>
|
||||||
|
</g>
|
||||||
|
</svg>
|
BIN
assets/microsoft.png
Normal file
BIN
assets/microsoft.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.6 KiB |
1
assets/microsoft.sixel
Normal file
1
assets/microsoft.sixel
Normal file
|
@ -0,0 +1 @@
|
||||||
|
P;1q"1;1;300;60#0;2;100;100;100#0!42?_ow{}!12?_ow{}!6?_ow{}}!5?_ow{{}}}!17~^NFbpw{}!8~!4}{wwo_!12?_oow{{{!4}!6~!4}{{wwo__!4?_ow{{}}}!23~^Nfrxw{{}}}!9~!4}{{woo_!12?_ow{}!15~^NFbpw{}!17~^NFB@-!36?_ow{}!6~!6?_ow{}!6~??w{}!7~?o{}!10~^^!10NFBpw{}!6~!8N^!9~{_!4?_o{}!8~^^!9N^^!9~{w}!8~^!18NFbx{}!9~^^!8N^^!9~}{o???ow{}!6~!11NFB@GKM!5N!10~!4NFB@-!30?_ow{}!12~_ow{}!12~??!20~FB@!15?!10~!10?r!9~???{!8~NB@!15?@FN!16~!4{!4wooo__!5?_}!8~^FB!16?@F^!8~{o!10~!9o!13?!10~-!24?_ow{}!35~??!19~x!18?!10~?CK[!4{}!9~^B??N!8~x!21?!10~N^^!18~}{o!10~!22?!29~!13?!10~-!18?_ow{}!8~^NFB@?!11~^NFB@?!10~??!10~F!9~}{wo__!12?!10~!5?@BFN^!9~}{wof^!7~}wo__!11?__o{!9~N@!7?!6@Bb!10~N!9~{o__!12?__o{}!8~F@!10~!9B!13?!10~-!12?_ow{}!8~^NFB@!7?!5~^NFB@!7?!10~??!10~??@FN^!20~??!10~!11?@BFN^!23~!7}!10~^NFB~!12}!12~^NB??BFN^!9~!10}!9~^NF@???!10~!22?!5~^NFB@-!6?_ow{}!8~^NFB@!13?FFB@!13?!10F??!10F!7?@@BB!15F??!10F!17?@BFN^!10~|zrfFF!10NFFFBB@@!5?!21FBB@!11?@BBFFNNN!10^NNNFFBB@!8?!10~!22?NFB@-_ow{}!8~^NFB@!119?@BFN^!9~}{wo!88?!10~-!7~^NFB@!131?@BFN^!7~!88?!7~^NF-~^NFB@!143?@BFN^~!88?~^NFB@\
|
12
build.rs
Normal file
12
build.rs
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
fn main() {
|
||||||
|
if std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default() == "windows"
|
||||||
|
&& std::env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default() == "msvc"
|
||||||
|
{
|
||||||
|
let path = std::path::absolute("src/edit.exe.manifest").unwrap();
|
||||||
|
let path = path.to_str().unwrap();
|
||||||
|
println!("cargo::rerun-if-changed=src/edit.exe.manifest");
|
||||||
|
println!("cargo::rustc-link-arg-bin=edit=/MANIFEST:EMBED");
|
||||||
|
println!("cargo::rustc-link-arg-bin=edit=/MANIFESTINPUT:{}", path);
|
||||||
|
println!("cargo::rustc-link-arg-bin=edit=/WX");
|
||||||
|
}
|
||||||
|
}
|
80
src/apperr.rs
Normal file
80
src/apperr.rs
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
use crate::sys;
|
||||||
|
use std::num::NonZeroU32;
|
||||||
|
use std::{fmt, io, result};
|
||||||
|
|
||||||
|
// Remember to add an entry to `Error::message()` for each new error.
|
||||||
|
pub const APP_ICU_MISSING: Error = Error::new_app(1);
|
||||||
|
pub const APP_FILE_NOT_FOUND: Error = Error::new_app(2);
|
||||||
|
|
||||||
|
pub type Result<T> = result::Result<T, Error>;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct Error(NonZeroU32);
|
||||||
|
|
||||||
|
impl Error {
|
||||||
|
const FLAGS_MASK: u32 = 0xF8000000; // Top 5 bits
|
||||||
|
const FLAGS_CUSTOM_FAILURE: u32 = 0xA0000000;
|
||||||
|
|
||||||
|
const TAG_APP: u32 = Self::FLAGS_CUSTOM_FAILURE | (1 << 16);
|
||||||
|
const TAG_ICU: u32 = Self::FLAGS_CUSTOM_FAILURE | (2 << 16);
|
||||||
|
|
||||||
|
pub const unsafe fn new(code: u32) -> Self {
|
||||||
|
Error(unsafe { NonZeroU32::new_unchecked(code) })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn new_app(code: u32) -> Self {
|
||||||
|
debug_assert!(code > 0 && code <= 0xFFFF);
|
||||||
|
unsafe { Self::new(Self::TAG_APP | code) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn new_icu(code: u32) -> Self {
|
||||||
|
debug_assert!(code > 0 && code <= 0xFFFF);
|
||||||
|
unsafe { Self::new(Self::TAG_ICU | code) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_app(&self) -> bool {
|
||||||
|
(self.0.get() & 0xFFFF0000) == Self::TAG_APP
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_icu(&self) -> bool {
|
||||||
|
(self.0.get() & 0xFFFF0000) == Self::TAG_ICU
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn code(&self) -> u32 {
|
||||||
|
self.0.get() & 0xFFFF
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn value(&self) -> u32 {
|
||||||
|
self.0.get()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn message(self) -> String {
|
||||||
|
match self {
|
||||||
|
APP_ICU_MISSING => "ICU not found".to_string(),
|
||||||
|
APP_FILE_NOT_FOUND => "File not found".to_string(),
|
||||||
|
_ => {
|
||||||
|
debug_assert!(!self.is_app());
|
||||||
|
if self.is_icu() {
|
||||||
|
format!("ICU Error {:#08x}", self.code())
|
||||||
|
} else {
|
||||||
|
sys::format_error(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Error {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "{:#08x}", self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<io::Error> for Error {
|
||||||
|
fn from(err: io::Error) -> Self {
|
||||||
|
match err.kind() {
|
||||||
|
io::ErrorKind::NotFound => APP_FILE_NOT_FOUND,
|
||||||
|
_ => sys::io_error_to_apperr(err),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
2299
src/buffer.rs
Normal file
2299
src/buffer.rs
Normal file
File diff suppressed because it is too large
Load diff
22
src/edit.exe.manifest
Normal file
22
src/edit.exe.manifest
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<assembly
|
||||||
|
xmlns="urn:schemas-microsoft-com:asm.v1"
|
||||||
|
xmlns:asmv3="urn:schemas-microsoft-com:asm.v3"
|
||||||
|
xmlns:cv1="urn:schemas-microsoft-com:compatibility.v1"
|
||||||
|
xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings"
|
||||||
|
xmlns:ws3="http://schemas.microsoft.com/SMI/2019/WindowsSettings"
|
||||||
|
xmlns:ws4="http://schemas.microsoft.com/SMI/2020/WindowsSettings"
|
||||||
|
manifestVersion="1.0">
|
||||||
|
<asmv3:application>
|
||||||
|
<windowsSettings>
|
||||||
|
<ws2:longPathAware>true</ws2:longPathAware>
|
||||||
|
<ws3:activeCodePage>UTF-8</ws3:activeCodePage>
|
||||||
|
<ws4:heapType>SegmentHeap</ws4:heapType>
|
||||||
|
</windowsSettings>
|
||||||
|
</asmv3:application>
|
||||||
|
<cv1:compatibility>
|
||||||
|
<application>
|
||||||
|
<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
|
||||||
|
</application>
|
||||||
|
</cv1:compatibility>
|
||||||
|
</assembly>
|
536
src/framebuffer.rs
Normal file
536
src/framebuffer.rs
Normal file
|
@ -0,0 +1,536 @@
|
||||||
|
use crate::helpers::{CoordType, Point, Rect, Size};
|
||||||
|
use crate::{helpers, ucd};
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
|
pub enum IndexedColor {
|
||||||
|
Black,
|
||||||
|
Red,
|
||||||
|
Green,
|
||||||
|
Yellow,
|
||||||
|
Blue,
|
||||||
|
Magenta,
|
||||||
|
Cyan,
|
||||||
|
White,
|
||||||
|
BrightBlack,
|
||||||
|
BrightRed,
|
||||||
|
BrightGreen,
|
||||||
|
BrightYellow,
|
||||||
|
BrightBlue,
|
||||||
|
BrightMagenta,
|
||||||
|
BrightCyan,
|
||||||
|
BrightWhite,
|
||||||
|
DefaultBackground,
|
||||||
|
DefaultForeground,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const INDEXED_COLORS_COUNT: usize = 18;
|
||||||
|
|
||||||
|
pub const DEFAULT_THEME: [u32; INDEXED_COLORS_COUNT] = [
|
||||||
|
0xff000000, 0xff212cbe, 0xff3aae3f, 0xff4a9abe, 0xffbe4d20, 0xffbe54bb, 0xffb2a700, 0xffbebebe,
|
||||||
|
0xff808080, 0xff303eff, 0xff51ea58, 0xff44c9ff, 0xffff6a2f, 0xffff74fc, 0xfff0e100, 0xffffffff,
|
||||||
|
0xff000000, 0xffffffff,
|
||||||
|
];
|
||||||
|
|
||||||
|
pub struct Framebuffer {
|
||||||
|
indexed_colors: [u32; INDEXED_COLORS_COUNT],
|
||||||
|
size: Size,
|
||||||
|
lines: Vec<String>,
|
||||||
|
bg_bitmap: Vec<u32>,
|
||||||
|
fg_bitmap: Vec<u32>,
|
||||||
|
auto_colors: [u32; 2], // [dark, light]
|
||||||
|
cursor: Point,
|
||||||
|
cursor_overtype: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Framebuffer {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
indexed_colors: DEFAULT_THEME,
|
||||||
|
size: Size::default(),
|
||||||
|
lines: Vec::new(),
|
||||||
|
bg_bitmap: Vec::new(),
|
||||||
|
fg_bitmap: Vec::new(),
|
||||||
|
auto_colors: [0, 0],
|
||||||
|
cursor: Point { x: -1, y: -1 },
|
||||||
|
cursor_overtype: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_indexed_colors(&mut self, colors: [u32; INDEXED_COLORS_COUNT]) {
|
||||||
|
self.indexed_colors = colors;
|
||||||
|
|
||||||
|
self.auto_colors = [
|
||||||
|
self.indexed_colors[IndexedColor::Black as usize],
|
||||||
|
self.indexed_colors[IndexedColor::BrightWhite as usize],
|
||||||
|
];
|
||||||
|
if !Self::quick_is_dark(self.auto_colors[0]) {
|
||||||
|
self.auto_colors.swap(0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset(&mut self, size: Size) {
|
||||||
|
let width = size.width as usize;
|
||||||
|
|
||||||
|
if size != self.size {
|
||||||
|
let height = size.height as usize;
|
||||||
|
let area = width * height;
|
||||||
|
self.size = size;
|
||||||
|
self.lines = vec![String::new(); height];
|
||||||
|
self.bg_bitmap = vec![0; area];
|
||||||
|
self.fg_bitmap = vec![0; area];
|
||||||
|
}
|
||||||
|
|
||||||
|
let bg = self.indexed_colors[IndexedColor::DefaultBackground as usize];
|
||||||
|
self.bg_bitmap.fill(bg);
|
||||||
|
self.fg_bitmap.fill(0);
|
||||||
|
self.cursor = Point { x: -1, y: -1 };
|
||||||
|
|
||||||
|
for l in &mut self.lines {
|
||||||
|
l.clear();
|
||||||
|
l.reserve(width + width / 2);
|
||||||
|
helpers::string_append_repeat(l, ' ', width);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Replaces text contents in a single line of the framebuffer.
|
||||||
|
/// All coordinates are in viewport coordinates.
|
||||||
|
/// Assumes that all tabs have been replaced with spaces.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `y` - The y-coordinate of the line to replace.
|
||||||
|
/// * `origin_x` - The x-coordinate where the text should be inserted.
|
||||||
|
/// * `clip_right` - The x-coordinate past which the text will be clipped.
|
||||||
|
/// * `text` - The text to insert.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// The rectangle that was updated.
|
||||||
|
pub fn replace_text(
|
||||||
|
&mut self,
|
||||||
|
y: CoordType,
|
||||||
|
origin_x: CoordType,
|
||||||
|
clip_right: CoordType,
|
||||||
|
text: &str,
|
||||||
|
) -> Rect {
|
||||||
|
let Some(line) = self.lines.get_mut(y as usize) else {
|
||||||
|
return Rect::default();
|
||||||
|
};
|
||||||
|
|
||||||
|
let bytes = text.as_bytes();
|
||||||
|
let clip_right = clip_right.clamp(0, self.size.width);
|
||||||
|
let layout_width = clip_right - origin_x;
|
||||||
|
|
||||||
|
// Can't insert text that can't fit or is empty.
|
||||||
|
if layout_width <= 0 || bytes.is_empty() {
|
||||||
|
return Rect::default();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut cfg = ucd::MeasurementConfig::new(&bytes);
|
||||||
|
|
||||||
|
// Check if the text intersects with the left edge of the framebuffer
|
||||||
|
// and figure out the parts that are inside.
|
||||||
|
let mut left = origin_x;
|
||||||
|
if left < 0 {
|
||||||
|
let cursor = cfg.goto_visual(Point { x: -left, y: 0 });
|
||||||
|
left += cursor.visual_pos.x;
|
||||||
|
|
||||||
|
if left < 0 && cursor.offset < text.len() {
|
||||||
|
// `-left` must've intersected a wide glyph. Go to the next one.
|
||||||
|
let cursor = cfg.goto_logical(Point {
|
||||||
|
x: cursor.logical_pos.x + 1,
|
||||||
|
y: 0,
|
||||||
|
});
|
||||||
|
left += cursor.visual_pos.x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the text still starts outside the framebuffer, we must've ran out of text above.
|
||||||
|
// Otherwise, if it starts outside the right edge to begin with, we can't insert it anyway.
|
||||||
|
if left < 0 || left >= clip_right {
|
||||||
|
return Rect::default();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Measure the width of the new text (= `res_new.visual_target.x`).
|
||||||
|
let res_new = cfg.goto_visual(Point {
|
||||||
|
x: layout_width,
|
||||||
|
y: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Figure out at which byte offset the new text gets inserted.
|
||||||
|
let right = left + res_new.visual_pos.x;
|
||||||
|
let line_bytes = line.as_bytes();
|
||||||
|
let mut cfg_old = ucd::MeasurementConfig::new(&line_bytes);
|
||||||
|
let res_old_beg = cfg_old.goto_visual(Point { x: left, y: 0 });
|
||||||
|
let res_old_end = cfg_old.goto_visual(Point { x: right, y: 0 });
|
||||||
|
|
||||||
|
// If we intersect a wide glyph, we need to pad the new text with spaces.
|
||||||
|
let mut str_new = &text[..res_new.offset];
|
||||||
|
let mut str_buf = String::new();
|
||||||
|
let overlap_beg = res_old_beg.visual_pos.x - left;
|
||||||
|
let overlap_end = right - res_old_end.visual_pos.x;
|
||||||
|
if overlap_beg > 0 || overlap_end > 0 {
|
||||||
|
if overlap_beg > 0 {
|
||||||
|
helpers::string_append_repeat(&mut str_buf, ' ', overlap_beg as usize);
|
||||||
|
}
|
||||||
|
str_buf.push_str(str_new);
|
||||||
|
if overlap_end > 0 {
|
||||||
|
helpers::string_append_repeat(&mut str_buf, ' ', overlap_end as usize);
|
||||||
|
}
|
||||||
|
str_new = &str_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
(*line).replace_range(res_old_beg.offset..res_old_end.offset, str_new);
|
||||||
|
|
||||||
|
Rect {
|
||||||
|
left,
|
||||||
|
top: y,
|
||||||
|
right,
|
||||||
|
bottom: y + 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn draw_scrollbar(
|
||||||
|
&mut self,
|
||||||
|
clip_rect: Rect,
|
||||||
|
track: Rect,
|
||||||
|
content_offset: CoordType,
|
||||||
|
content_height: CoordType,
|
||||||
|
) {
|
||||||
|
if track.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let viewport_height = track.height();
|
||||||
|
// The content height is at least the viewport height.
|
||||||
|
let content_height = content_height.max(viewport_height);
|
||||||
|
// The content offset must be at least one viewport height from the bottom.
|
||||||
|
// You don't want to scroll past the end after all...
|
||||||
|
let content_offset = content_offset.clamp(0, content_height - viewport_height);
|
||||||
|
|
||||||
|
// In order to increase the visual resolution of the scrollbar,
|
||||||
|
// we'll use 1/8th blocks to represent the thumb.
|
||||||
|
// First, scale the offsets to get that 1/8th resolution.
|
||||||
|
let viewport_height = viewport_height as i64 * 8;
|
||||||
|
let content_offset = content_offset as i64 * 8;
|
||||||
|
let content_height = content_height as i64 * 8;
|
||||||
|
|
||||||
|
// The proportional thumb height (0-1) is the fraction of viewport and
|
||||||
|
// content height. The taller the content, the smaller the thumb:
|
||||||
|
// = viewport_height / content_height
|
||||||
|
//
|
||||||
|
// We then scale that to the viewport height to get the height in 1/8th units.
|
||||||
|
// = viewport_height * viewport_height / content_height
|
||||||
|
//
|
||||||
|
// We add content_height/2 to round the integer division, which results in a numerator of:
|
||||||
|
// = viewport_height * viewport_height + content_height / 2
|
||||||
|
//
|
||||||
|
// Finally we add +1 to round up the division if `content_height` is uneven. This ensures that
|
||||||
|
// in case of a rounding issue, we'll make the track too large and clamp it to the track size.
|
||||||
|
let thumb_numerator = viewport_height * viewport_height + content_height / 2 + 1;
|
||||||
|
let thumb_height = thumb_numerator / content_height;
|
||||||
|
// Ensure the thumb has a minimum size of 1 row.
|
||||||
|
let thumb_height = thumb_height.max(8);
|
||||||
|
|
||||||
|
// The proportional thumb top position (0-1) is naturally:
|
||||||
|
// = content_offset / content_height
|
||||||
|
//
|
||||||
|
// The bottom position is 1 viewport-height below the top position:
|
||||||
|
// = (viewport_height + content_offset) / content_height
|
||||||
|
//
|
||||||
|
// Since everything must be scaled to the 1/8th units we must multiply by viewport_height:
|
||||||
|
// = viewport_height * (viewport_height + content_offset) / content_height
|
||||||
|
// = viewport_height * viewport_height + viewport_height * content_offset / content_height
|
||||||
|
//
|
||||||
|
// And we also want that rounded integer division as before. This transforms the
|
||||||
|
// `viewport_height * viewport_height` portion into the `thumb_enumerator` above.
|
||||||
|
// = thumb_numerator + viewport_height * content_offset / content_height
|
||||||
|
//
|
||||||
|
let thumb_bottom = (viewport_height * content_offset + thumb_numerator) / content_height;
|
||||||
|
// Now that the bottom is flush with the bottom of the track, we can calculate the top.
|
||||||
|
let thumb_top = (thumb_bottom - thumb_height).max(0);
|
||||||
|
|
||||||
|
// Calculate the height of the top/bottom cell of the thumb.
|
||||||
|
let top_fract = (thumb_top % 8) as CoordType;
|
||||||
|
let bottom_fract = (thumb_bottom % 8) as CoordType;
|
||||||
|
|
||||||
|
// Shift to absolute coordinates.
|
||||||
|
let thumb_top = ((thumb_top + 7) / 8) as CoordType + track.top;
|
||||||
|
let thumb_bottom = (thumb_bottom / 8) as CoordType + track.top;
|
||||||
|
|
||||||
|
let track_clipped = track.intersect(clip_rect);
|
||||||
|
|
||||||
|
// Clamp to the visible area.
|
||||||
|
let thumb_top_clipped = thumb_top.max(track_clipped.top);
|
||||||
|
let thumb_bottom_clipped = thumb_bottom.min(track_clipped.bottom);
|
||||||
|
|
||||||
|
self.blend_bg(track_clipped, self.indexed(IndexedColor::BrightBlack));
|
||||||
|
self.blend_fg(track_clipped, self.indexed(IndexedColor::BrightWhite));
|
||||||
|
|
||||||
|
// Draw the full blocks.
|
||||||
|
for y in thumb_top_clipped..thumb_bottom_clipped {
|
||||||
|
self.replace_text(y, track_clipped.left, track_clipped.right, "█");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Draw the top/bottom cell of the thumb.
|
||||||
|
// U+2581 to U+2588, 1/8th block to 8/8th block elements glyphs: ▁▂▃▄▅▆▇█
|
||||||
|
// In UTF8: E2 96 81 to E2 96 88
|
||||||
|
let mut fract_buf = [0xE2, 0x96, 0x88];
|
||||||
|
if top_fract != 0 {
|
||||||
|
fract_buf[2] = (0x88 - top_fract) as u8;
|
||||||
|
self.replace_text(
|
||||||
|
thumb_top_clipped - 1,
|
||||||
|
track_clipped.left,
|
||||||
|
track_clipped.right,
|
||||||
|
unsafe { std::str::from_utf8_unchecked(&fract_buf) },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if bottom_fract != 0 {
|
||||||
|
fract_buf[2] = (0x88 - bottom_fract) as u8;
|
||||||
|
let rect = self.replace_text(
|
||||||
|
thumb_bottom_clipped,
|
||||||
|
track_clipped.left,
|
||||||
|
track_clipped.right,
|
||||||
|
unsafe { std::str::from_utf8_unchecked(&fract_buf) },
|
||||||
|
);
|
||||||
|
self.blend_bg(rect, self.indexed(IndexedColor::BrightWhite));
|
||||||
|
self.blend_fg(rect, self.indexed(IndexedColor::BrightBlack));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn indexed(&self, index: IndexedColor) -> u32 {
|
||||||
|
self.indexed_colors[index as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Blends a background color over the given rectangular area.
|
||||||
|
pub fn blend_bg(&mut self, target: Rect, bg: u32) {
|
||||||
|
Self::alpha_blend_rect(&mut self.bg_bitmap[..], target, self.size, bg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Blends a foreground color over the given rectangular area.
|
||||||
|
pub fn blend_fg(&mut self, target: Rect, fg: u32) {
|
||||||
|
if fg != 0 {
|
||||||
|
Self::alpha_blend_rect(&mut self.fg_bitmap[..], target, self.size, fg);
|
||||||
|
} else {
|
||||||
|
self.blend_rect_auto(target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Performs alpha blending on a rectangle inside the destination bitmap.
|
||||||
|
fn alpha_blend_rect(dst: &mut [u32], rect: Rect, size: Size, src: u32) {
|
||||||
|
let width = size.width;
|
||||||
|
let height = size.height;
|
||||||
|
let left = rect.left.clamp(0, width);
|
||||||
|
let right = rect.right.clamp(0, width);
|
||||||
|
let top = rect.top.clamp(0, height);
|
||||||
|
let bottom = rect.bottom.clamp(0, height);
|
||||||
|
|
||||||
|
if left >= right || top >= bottom {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src & 0xff000000) == 0xff000000 {
|
||||||
|
for y in top..bottom {
|
||||||
|
let beg = (y * width + left) as usize;
|
||||||
|
let end = (y * width + right) as usize;
|
||||||
|
dst[beg..end].fill(src);
|
||||||
|
}
|
||||||
|
} else if (src & 0xff000000) != 0x00000000 {
|
||||||
|
for y in top..bottom {
|
||||||
|
let beg = (y * width + left) as usize;
|
||||||
|
let end = (y * width + right) as usize;
|
||||||
|
let mut off = beg;
|
||||||
|
|
||||||
|
while {
|
||||||
|
let color = dst[off];
|
||||||
|
|
||||||
|
// Chunk into runs of the same color, so that we only call alpha_blend once per run.
|
||||||
|
let chunk_beg = off;
|
||||||
|
while {
|
||||||
|
off += 1;
|
||||||
|
off < end && dst[off] == color
|
||||||
|
} {}
|
||||||
|
let chunk_end = off;
|
||||||
|
|
||||||
|
let color = Self::mix(color, src, 1.0, 1.0);
|
||||||
|
dst[chunk_beg..chunk_end].fill(color);
|
||||||
|
|
||||||
|
off < end
|
||||||
|
} {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn blend_rect_auto(&mut self, rect: Rect) {
|
||||||
|
let width = self.size.width;
|
||||||
|
let height = self.size.height;
|
||||||
|
let left = rect.left.clamp(0, width);
|
||||||
|
let right = rect.right.clamp(0, width);
|
||||||
|
let top = rect.top.clamp(0, height);
|
||||||
|
let bottom = rect.bottom.clamp(0, height);
|
||||||
|
|
||||||
|
if left >= right || top >= bottom {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for y in top..bottom {
|
||||||
|
let beg = (y * width + left) as usize;
|
||||||
|
let end = (y * width + right) as usize;
|
||||||
|
let mut off = beg;
|
||||||
|
|
||||||
|
while {
|
||||||
|
let bg = self.bg_bitmap[off];
|
||||||
|
|
||||||
|
// Chunk into runs of the same color, so that we only call Self::quick_is_dark once per run.
|
||||||
|
let chunk_beg = off;
|
||||||
|
while {
|
||||||
|
off += 1;
|
||||||
|
off < end && self.bg_bitmap[off] == bg
|
||||||
|
} {}
|
||||||
|
let chunk_end = off;
|
||||||
|
|
||||||
|
let fg = self.auto_colors[Self::quick_is_dark(bg) as usize];
|
||||||
|
self.fg_bitmap[chunk_beg..chunk_end].fill(fg);
|
||||||
|
|
||||||
|
off < end
|
||||||
|
} {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mix(dst: u32, src: u32, dst_balance: f32, src_balance: f32) -> u32 {
|
||||||
|
let src_r = Self::srgb_to_linear(src & 0xff);
|
||||||
|
let src_g = Self::srgb_to_linear((src >> 8) & 0xff);
|
||||||
|
let src_b = Self::srgb_to_linear((src >> 16) & 0xff);
|
||||||
|
let src_a = (src >> 24) as f32 / 255.0f32;
|
||||||
|
let src_a = src_a * dst_balance;
|
||||||
|
|
||||||
|
let dst_r = Self::srgb_to_linear(dst & 0xff);
|
||||||
|
let dst_g = Self::srgb_to_linear((dst >> 8) & 0xff);
|
||||||
|
let dst_b = Self::srgb_to_linear((dst >> 16) & 0xff);
|
||||||
|
let dst_a = (dst >> 24) as f32 / 255.0f32;
|
||||||
|
let dst_a = dst_a * src_balance;
|
||||||
|
|
||||||
|
let out_a = src_a + dst_a * (1.0f32 - src_a);
|
||||||
|
let out_r = (src_r * src_a + dst_r * dst_a * (1.0f32 - src_a)) / out_a;
|
||||||
|
let out_g = (src_g * src_a + dst_g * dst_a * (1.0f32 - src_a)) / out_a;
|
||||||
|
let out_b = (src_b * src_a + dst_b * dst_a * (1.0f32 - src_a)) / out_a;
|
||||||
|
|
||||||
|
(((out_a * 255.0f32) as u32) << 24)
|
||||||
|
| (Self::linear_to_srgb(out_b) << 16)
|
||||||
|
| (Self::linear_to_srgb(out_g) << 8)
|
||||||
|
| Self::linear_to_srgb(out_r)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn srgb_to_linear(c: u32) -> f32 {
|
||||||
|
let fc = c as f32 / 255.0f32;
|
||||||
|
if fc <= 0.04045f32 {
|
||||||
|
fc / 12.92f32
|
||||||
|
} else {
|
||||||
|
((fc + 0.055f32) / 1.055f32).powf(2.4f32)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn linear_to_srgb(c: f32) -> u32 {
|
||||||
|
if c <= 0.0031308f32 {
|
||||||
|
(c * 12.92f32 * 255.0f32) as u32
|
||||||
|
} else {
|
||||||
|
((1.055f32 * c.powf(1.0f32 / 2.4f32) - 0.055f32) * 255.0f32) as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn quick_is_dark(c: u32) -> bool {
|
||||||
|
let r = c & 0xff;
|
||||||
|
let g = (c >> 8) & 0xff;
|
||||||
|
let b = (c >> 16) & 0xff;
|
||||||
|
// Rough approximation of the sRGB luminance Y = 0.2126 R + 0.7152 G + 0.0722 B.
|
||||||
|
let l = r * 3 + g * 10 + b;
|
||||||
|
l < 128 * 14
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_cursor(&mut self, pos: Point, overtype: bool) {
|
||||||
|
self.cursor = pos;
|
||||||
|
self.cursor_overtype = overtype;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn render(&mut self) -> String {
|
||||||
|
let mut result = String::new();
|
||||||
|
result.push_str("\x1b[H");
|
||||||
|
|
||||||
|
let mut last_bg = self.bg_bitmap[0];
|
||||||
|
let mut last_fg = self.fg_bitmap[0];
|
||||||
|
// Invert the colors to force a color change on the first cell.
|
||||||
|
last_bg ^= 1;
|
||||||
|
last_fg ^= 1;
|
||||||
|
|
||||||
|
for y in 0..self.size.height {
|
||||||
|
if y != 0 {
|
||||||
|
result.push_str("\r\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
let line = &self.lines[y as usize][..];
|
||||||
|
let line_bytes = line.as_bytes();
|
||||||
|
let mut cfg = ucd::MeasurementConfig::new(&line_bytes);
|
||||||
|
|
||||||
|
for x in 0..self.size.width {
|
||||||
|
let bg = self.bg_bitmap[(y * self.size.width + x) as usize];
|
||||||
|
let fg = self.fg_bitmap[(y * self.size.width + x) as usize];
|
||||||
|
if bg == last_bg && fg == last_fg {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if x != 0 {
|
||||||
|
let beg = cfg.cursor().offset;
|
||||||
|
let end = cfg.goto_visual(Point { x, y: 0 }).offset;
|
||||||
|
result.push_str(&line[beg..end]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if last_bg != bg {
|
||||||
|
last_bg = bg;
|
||||||
|
_ = write!(
|
||||||
|
result,
|
||||||
|
"\x1b[48;2;{};{};{}m",
|
||||||
|
bg & 0xff,
|
||||||
|
(bg >> 8) & 0xff,
|
||||||
|
(bg >> 16) & 0xff
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if last_fg != fg {
|
||||||
|
last_fg = fg;
|
||||||
|
_ = write!(
|
||||||
|
result,
|
||||||
|
"\x1b[38;2;{};{};{}m",
|
||||||
|
fg & 0xff,
|
||||||
|
(fg >> 8) & 0xff,
|
||||||
|
(fg >> 16) & 0xff
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.push_str(&line[cfg.cursor().offset..]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.cursor.x >= 0 && self.cursor.y >= 0 {
|
||||||
|
// CUP to the cursor position.
|
||||||
|
// DECSCUSR to set the cursor style.
|
||||||
|
// DECTCEM to show the cursor.
|
||||||
|
_ = write!(
|
||||||
|
result,
|
||||||
|
"\x1b[{};{}H\x1b[{} q\x1b[?25h",
|
||||||
|
self.cursor.y + 1,
|
||||||
|
self.cursor.x + 1,
|
||||||
|
if self.cursor_overtype { 1 } else { 5 }
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
// DECTCEM to hide the cursor.
|
||||||
|
result.push_str("\x1b[?25l");
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn mix(dst: u32, src: u32, balance: f32) -> u32 {
|
||||||
|
Framebuffer::mix(dst, src, 1.0 - balance, balance)
|
||||||
|
}
|
234
src/fuzzy.rs
Normal file
234
src/fuzzy.rs
Normal file
|
@ -0,0 +1,234 @@
|
||||||
|
//! Fuzzy search algorithm based on the one used in VS Code (`/src/vs/base/common/fuzzyScorer.ts`).
|
||||||
|
//! Other algorithms exist, such as Sublime Text's, or the one used in `fzf`,
|
||||||
|
//! but I figured that this one is what lots of people may be familiar with.
|
||||||
|
|
||||||
|
use crate::icu;
|
||||||
|
|
||||||
|
pub type FuzzyScore = (i32, Vec<usize>);
|
||||||
|
|
||||||
|
const NO_MATCH: i32 = 0;
|
||||||
|
const NO_SCORE: FuzzyScore = (NO_MATCH, Vec::new());
|
||||||
|
|
||||||
|
pub fn score_fuzzy(target: &str, query: &str, allow_non_contiguous_matches: bool) -> FuzzyScore {
|
||||||
|
if target.is_empty() || query.is_empty() {
|
||||||
|
return NO_SCORE; // return early if target or query are empty
|
||||||
|
}
|
||||||
|
|
||||||
|
let target_lower = icu::fold_case(target);
|
||||||
|
let query_lower = icu::fold_case(query);
|
||||||
|
let target: Vec<char> = target.chars().collect();
|
||||||
|
let target_lower: Vec<char> = target_lower.chars().collect();
|
||||||
|
let query: Vec<char> = query.chars().collect();
|
||||||
|
let query_lower: Vec<char> = query_lower.chars().collect();
|
||||||
|
|
||||||
|
if target.len() < query.len() {
|
||||||
|
return NO_SCORE; // impossible for query to be contained in target
|
||||||
|
}
|
||||||
|
|
||||||
|
do_score_fuzzy(
|
||||||
|
&query,
|
||||||
|
&query_lower,
|
||||||
|
&target,
|
||||||
|
&target_lower,
|
||||||
|
allow_non_contiguous_matches,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_score_fuzzy(
|
||||||
|
query: &[char],
|
||||||
|
query_lower: &[char],
|
||||||
|
target: &[char],
|
||||||
|
target_lower: &[char],
|
||||||
|
allow_non_contiguous_matches: bool,
|
||||||
|
) -> FuzzyScore {
|
||||||
|
let mut scores = vec![0; query.len() * target.len()];
|
||||||
|
let mut matches = vec![0; query.len() * target.len()];
|
||||||
|
|
||||||
|
//
|
||||||
|
// Build Scorer Matrix:
|
||||||
|
//
|
||||||
|
// The matrix is composed of query q and target t. For each index we score
|
||||||
|
// q[i] with t[i] and compare that with the previous score. If the score is
|
||||||
|
// equal or larger, we keep the match. In addition to the score, we also keep
|
||||||
|
// the length of the consecutive matches to use as boost for the score.
|
||||||
|
//
|
||||||
|
// t a r g e t
|
||||||
|
// q
|
||||||
|
// u
|
||||||
|
// e
|
||||||
|
// r
|
||||||
|
// y
|
||||||
|
//
|
||||||
|
for query_index in 0..query.len() {
|
||||||
|
let query_index_offset = query_index * target.len();
|
||||||
|
let query_index_previous_offset = if query_index > 0 {
|
||||||
|
(query_index - 1) * target.len()
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
for target_index in 0..target.len() {
|
||||||
|
let current_index = query_index_offset + target_index;
|
||||||
|
let diag_index = if query_index > 0 && target_index > 0 {
|
||||||
|
query_index_previous_offset + target_index - 1
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
let left_score = if target_index > 0 {
|
||||||
|
scores[current_index - 1]
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
let diag_score = if query_index > 0 && target_index > 0 {
|
||||||
|
scores[diag_index]
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
let matches_sequence_len = if query_index > 0 && target_index > 0 {
|
||||||
|
matches[diag_index]
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
// If we are not matching on the first query character any more, we only produce a
|
||||||
|
// score if we had a score previously for the last query index (by looking at the diagScore).
|
||||||
|
// This makes sure that the query always matches in sequence on the target. For example
|
||||||
|
// given a target of "ede" and a query of "de", we would otherwise produce a wrong high score
|
||||||
|
// for query[1] ("e") matching on target[0] ("e") because of the "beginning of word" boost.
|
||||||
|
let score = if diag_score == 0 && query_index != 0 {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
compute_char_score(
|
||||||
|
query[query_index],
|
||||||
|
query_lower[query_index],
|
||||||
|
if target_index != 0 {
|
||||||
|
Some(target[target_index - 1])
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
},
|
||||||
|
target[target_index],
|
||||||
|
target_lower[target_index],
|
||||||
|
matches_sequence_len,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
// We have a score and its equal or larger than the left score
|
||||||
|
// Match: sequence continues growing from previous diag value
|
||||||
|
// Score: increases by diag score value
|
||||||
|
let is_valid_score = score != 0 && diag_score + score >= left_score;
|
||||||
|
if is_valid_score
|
||||||
|
&& (
|
||||||
|
// We don't need to check if it's contiguous if we allow non-contiguous matches
|
||||||
|
allow_non_contiguous_matches ||
|
||||||
|
// We must be looking for a contiguous match.
|
||||||
|
// Looking at an index higher than 0 in the query means we must have already
|
||||||
|
// found out this is contiguous otherwise there wouldn't have been a score
|
||||||
|
query_index > 0 ||
|
||||||
|
// lastly check if the query is completely contiguous at this index in the target
|
||||||
|
target_lower[target_index..].starts_with(&query_lower)
|
||||||
|
)
|
||||||
|
{
|
||||||
|
matches[current_index] = matches_sequence_len + 1;
|
||||||
|
scores[current_index] = diag_score + score;
|
||||||
|
} else {
|
||||||
|
// We either have no score or the score is lower than the left score
|
||||||
|
// Match: reset to 0
|
||||||
|
// Score: pick up from left hand side
|
||||||
|
matches[current_index] = NO_MATCH;
|
||||||
|
scores[current_index] = left_score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore Positions (starting from bottom right of matrix)
|
||||||
|
let mut positions = Vec::new();
|
||||||
|
|
||||||
|
if query.len() != 0 && target.len() != 0 {
|
||||||
|
let mut query_index = query.len() - 1;
|
||||||
|
let mut target_index = target.len() - 1;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let current_index = query_index * target.len() + target_index;
|
||||||
|
if matches[current_index] == NO_MATCH {
|
||||||
|
if target_index == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
target_index -= 1; // go left
|
||||||
|
} else {
|
||||||
|
positions.push(target_index);
|
||||||
|
|
||||||
|
// go up and left
|
||||||
|
if query_index == 0 || target_index == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
query_index -= 1;
|
||||||
|
target_index -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
positions.reverse();
|
||||||
|
}
|
||||||
|
|
||||||
|
(scores[query.len() * target.len() - 1], positions)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_char_score(
|
||||||
|
query: char,
|
||||||
|
query_lower: char,
|
||||||
|
target_prev: Option<char>,
|
||||||
|
target_curr: char,
|
||||||
|
target_curr_lower: char,
|
||||||
|
matches_sequence_len: i32,
|
||||||
|
) -> i32 {
|
||||||
|
let mut score = 0;
|
||||||
|
|
||||||
|
if !consider_as_equal(query_lower, target_curr_lower) {
|
||||||
|
return score; // no match of characters
|
||||||
|
}
|
||||||
|
|
||||||
|
// Character match bonus
|
||||||
|
score += 1;
|
||||||
|
|
||||||
|
// Consecutive match bonus
|
||||||
|
if matches_sequence_len > 0 {
|
||||||
|
score += matches_sequence_len * 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Same case bonus
|
||||||
|
if query == target_curr {
|
||||||
|
score += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(target_prev) = target_prev {
|
||||||
|
// After separator bonus
|
||||||
|
let separator_bonus = score_separator_at_pos(target_prev);
|
||||||
|
if separator_bonus > 0 {
|
||||||
|
score += separator_bonus;
|
||||||
|
}
|
||||||
|
// Inside word upper case bonus (camel case). We only give this bonus if we're not in a contiguous sequence.
|
||||||
|
// For example:
|
||||||
|
// NPE => NullPointerException = boost
|
||||||
|
// HTTP => HTTP = not boost
|
||||||
|
else if target_curr != target_curr_lower && matches_sequence_len == 0 {
|
||||||
|
score += 2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Start of word bonus
|
||||||
|
score += 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
score
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consider_as_equal(a: char, b: char) -> bool {
|
||||||
|
// Special case path separators: ignore platform differences
|
||||||
|
a == b || a == '/' || a == '\\' && b == '/' || b == '\\'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn score_separator_at_pos(ch: char) -> i32 {
|
||||||
|
match ch {
|
||||||
|
'/' | '\\' => 5, // prefer path separators...
|
||||||
|
'_' | '-' | '.' | ' ' | '\'' | '"' | ':' => 4, // ...over other separators
|
||||||
|
_ => 0,
|
||||||
|
}
|
||||||
|
}
|
382
src/helpers.rs
Normal file
382
src/helpers.rs
Normal file
|
@ -0,0 +1,382 @@
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::ffi::{CStr, CString, OsStr, OsString, c_char};
|
||||||
|
use std::mem;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::slice;
|
||||||
|
use std::str;
|
||||||
|
|
||||||
|
pub type CoordType = i32;
|
||||||
|
|
||||||
|
pub const COORD_TYPE_MIN: CoordType = -2147483647 - 1;
|
||||||
|
pub const COORD_TYPE_MAX: CoordType = 2147483647;
|
||||||
|
pub const COORD_TYPE_SAFE_MIN: CoordType = -32767 - 1;
|
||||||
|
pub const COORD_TYPE_SAFE_MAX: CoordType = 32767;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)]
|
||||||
|
pub struct Point {
|
||||||
|
pub x: CoordType,
|
||||||
|
pub y: CoordType,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Point {
|
||||||
|
pub const MIN: Point = Point {
|
||||||
|
x: COORD_TYPE_MIN,
|
||||||
|
y: COORD_TYPE_MIN,
|
||||||
|
};
|
||||||
|
pub const MAX: Point = Point {
|
||||||
|
x: COORD_TYPE_MAX,
|
||||||
|
y: COORD_TYPE_MAX,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd<Point> for Point {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
Some(self.cmp(other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for Point {
|
||||||
|
fn cmp(&self, other: &Self) -> Ordering {
|
||||||
|
match self.y.cmp(&other.y) {
|
||||||
|
Ordering::Equal => self.x.cmp(&other.x),
|
||||||
|
ord => ord,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)]
|
||||||
|
pub struct Size {
|
||||||
|
pub width: CoordType,
|
||||||
|
pub height: CoordType,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Size {
|
||||||
|
pub fn as_rect(&self) -> Rect {
|
||||||
|
Rect {
|
||||||
|
left: 0,
|
||||||
|
top: 0,
|
||||||
|
right: self.width,
|
||||||
|
bottom: self.height,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)]
|
||||||
|
pub struct Rect {
|
||||||
|
pub left: CoordType,
|
||||||
|
pub top: CoordType,
|
||||||
|
pub right: CoordType,
|
||||||
|
pub bottom: CoordType,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Rect {
|
||||||
|
pub fn one(value: CoordType) -> Self {
|
||||||
|
Self {
|
||||||
|
left: value,
|
||||||
|
top: value,
|
||||||
|
right: value,
|
||||||
|
bottom: value,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn two(top_bottom: CoordType, left_right: CoordType) -> Self {
|
||||||
|
Self {
|
||||||
|
left: left_right,
|
||||||
|
top: top_bottom,
|
||||||
|
right: left_right,
|
||||||
|
bottom: top_bottom,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn three(top: CoordType, left_right: CoordType, bottom: CoordType) -> Self {
|
||||||
|
Self {
|
||||||
|
left: left_right,
|
||||||
|
top,
|
||||||
|
right: left_right,
|
||||||
|
bottom,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.left >= self.right || self.top >= self.bottom
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn width(&self) -> CoordType {
|
||||||
|
self.right - self.left
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn height(&self) -> CoordType {
|
||||||
|
self.bottom - self.top
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains(&self, point: Point) -> bool {
|
||||||
|
point.x >= self.left && point.x < self.right && point.y >= self.top && point.y < self.bottom
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn intersect(&self, rhs: Self) -> Self {
|
||||||
|
let l = self.left.max(rhs.left);
|
||||||
|
let t = self.top.max(rhs.top);
|
||||||
|
let r = self.right.min(rhs.right);
|
||||||
|
let b = self.bottom.min(rhs.bottom);
|
||||||
|
|
||||||
|
// Ensure that the size is non-negative. This avoids bugs,
|
||||||
|
// because some height/width is negative all of a sudden.
|
||||||
|
let r = l.max(r);
|
||||||
|
let b = t.max(b);
|
||||||
|
|
||||||
|
Rect {
|
||||||
|
left: l,
|
||||||
|
top: t,
|
||||||
|
right: r,
|
||||||
|
bottom: b,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn wyr3(p: *const u8, k: usize) -> u64 {
|
||||||
|
let p0 = unsafe { p.read() as u64 };
|
||||||
|
let p1 = unsafe { p.add(k >> 1).read() as u64 };
|
||||||
|
let p2 = unsafe { p.add(k - 1).read() as u64 };
|
||||||
|
(p0 << 16) | (p1 << 8) | p2
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn wyr4(p: *const u8) -> u64 {
|
||||||
|
unsafe { (p as *const u32).read_unaligned() as u64 }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn wyr8(p: *const u8) -> u64 {
|
||||||
|
unsafe { (p as *const u64).read_unaligned() }
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is a weak mix function on its own. It may be worth considering
|
||||||
|
// replacing external uses of this function with a stronger one.
|
||||||
|
// On the other hand, it's very fast.
|
||||||
|
pub fn wymix(lhs: u64, rhs: u64) -> u64 {
|
||||||
|
let lhs = lhs as u128;
|
||||||
|
let rhs = rhs as u128;
|
||||||
|
let r = lhs * rhs;
|
||||||
|
(r >> 64) as u64 ^ (r as u64)
|
||||||
|
}
|
||||||
|
|
||||||
|
// The venerable wyhash hash function. It's fast and has good statistical properties.
|
||||||
|
// It's in the public domain.
|
||||||
|
pub fn hash(mut seed: u64, data: &[u8]) -> u64 {
|
||||||
|
unsafe {
|
||||||
|
const S0: u64 = 0xa0761d6478bd642f;
|
||||||
|
const S1: u64 = 0xe7037ed1a0b428db;
|
||||||
|
const S2: u64 = 0x8ebc6af09c88c6e3;
|
||||||
|
const S3: u64 = 0x589965cc75374cc3;
|
||||||
|
|
||||||
|
let len = data.len();
|
||||||
|
let mut p = data.as_ptr();
|
||||||
|
let a;
|
||||||
|
let b;
|
||||||
|
|
||||||
|
seed ^= S0;
|
||||||
|
|
||||||
|
if len <= 16 {
|
||||||
|
if len >= 4 {
|
||||||
|
a = (wyr4(p) << 32) | wyr4(p.add((len >> 3) << 2));
|
||||||
|
b = (wyr4(p.add(len - 4)) << 32) | wyr4(p.add(len - 4 - ((len >> 3) << 2)));
|
||||||
|
} else if len > 0 {
|
||||||
|
a = wyr3(p, len);
|
||||||
|
b = 0;
|
||||||
|
} else {
|
||||||
|
a = 0;
|
||||||
|
b = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let mut i = len;
|
||||||
|
if i > 48 {
|
||||||
|
let mut seed1 = seed;
|
||||||
|
let mut seed2 = seed;
|
||||||
|
while {
|
||||||
|
seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed);
|
||||||
|
seed1 = wymix(wyr8(p.add(16)) ^ S2, wyr8(p.add(24)) ^ seed1);
|
||||||
|
seed2 = wymix(wyr8(p.add(32)) ^ S3, wyr8(p.add(40)) ^ seed2);
|
||||||
|
p = p.add(48);
|
||||||
|
i -= 48;
|
||||||
|
i > 48
|
||||||
|
} {}
|
||||||
|
seed ^= seed1 ^ seed2;
|
||||||
|
}
|
||||||
|
while i > 16 {
|
||||||
|
seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed);
|
||||||
|
i -= 16;
|
||||||
|
p = p.add(16);
|
||||||
|
}
|
||||||
|
a = wyr8(p.offset(i as isize - 16));
|
||||||
|
b = wyr8(p.offset(i as isize - 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
wymix(S1 ^ (len as u64), wymix(a ^ S1, b ^ seed))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn hash_str(seed: u64, s: &str) -> u64 {
|
||||||
|
hash(seed, s.as_bytes())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn string_append_repeat(dst: &mut String, ch: char, total_copies: usize) {
|
||||||
|
if total_copies == 0 {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let buf = unsafe { dst.as_mut_vec() };
|
||||||
|
|
||||||
|
if ch.is_ascii() {
|
||||||
|
// Compiles down to `memset()`.
|
||||||
|
buf.extend(std::iter::repeat_n(ch as u8, total_copies));
|
||||||
|
} else {
|
||||||
|
// Implements efficient string padding using quadratic duplication.
|
||||||
|
let mut utf8_buf = [0; 4];
|
||||||
|
let utf8 = ch.encode_utf8(&mut utf8_buf).as_bytes();
|
||||||
|
let initial_len = buf.len();
|
||||||
|
let added_len = utf8.len() * total_copies;
|
||||||
|
let final_len = initial_len + added_len;
|
||||||
|
|
||||||
|
buf.reserve(added_len);
|
||||||
|
buf.extend_from_slice(utf8);
|
||||||
|
|
||||||
|
while buf.len() != final_len {
|
||||||
|
let end = (final_len - buf.len() + initial_len).min(buf.len());
|
||||||
|
buf.extend_from_within(initial_len..end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `std::cmp::minmax` is unstable, as per usual.
|
||||||
|
pub fn minmax<T>(v1: T, v2: T) -> [T; 2]
|
||||||
|
where
|
||||||
|
T: Ord,
|
||||||
|
{
|
||||||
|
if v2 < v1 { [v2, v1] } else { [v1, v2] }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DisplayablePathBuf {
|
||||||
|
value: PathBuf,
|
||||||
|
str: Cow<'static, str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DisplayablePathBuf {
|
||||||
|
pub fn new(value: PathBuf) -> Self {
|
||||||
|
let str = value.to_string_lossy();
|
||||||
|
let str = unsafe { mem::transmute(str) };
|
||||||
|
Self { value, str }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_path(&self) -> &Path {
|
||||||
|
&self.value
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
&self.str
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
self.value.as_os_str().as_encoded_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn clone_path_buf(&self) -> PathBuf {
|
||||||
|
self.value.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn take(self) -> PathBuf {
|
||||||
|
self.value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for DisplayablePathBuf {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
value: PathBuf::default(),
|
||||||
|
str: Cow::Borrowed(""),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for DisplayablePathBuf {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
DisplayablePathBuf::new(self.value.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<OsString> for DisplayablePathBuf {
|
||||||
|
fn from(s: OsString) -> DisplayablePathBuf {
|
||||||
|
DisplayablePathBuf::new(PathBuf::from(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: ?Sized + AsRef<OsStr>> From<&T> for DisplayablePathBuf {
|
||||||
|
fn from(s: &T) -> DisplayablePathBuf {
|
||||||
|
DisplayablePathBuf::new(PathBuf::from(s))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DisplayableCString {
|
||||||
|
value: CString,
|
||||||
|
str: Cow<'static, str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DisplayableCString {
|
||||||
|
pub fn new(value: CString) -> Self {
|
||||||
|
let str = value.to_string_lossy();
|
||||||
|
let str = unsafe { mem::transmute(str) };
|
||||||
|
Self { value, str }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn from_ptr(ptr: *const c_char) -> Self {
|
||||||
|
let s = unsafe { CStr::from_ptr(ptr) };
|
||||||
|
Self::new(s.to_owned())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_cstr(&self) -> &CStr {
|
||||||
|
&self.value
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_str(&self) -> &str {
|
||||||
|
&self.str
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub const unsafe fn str_from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str {
|
||||||
|
unsafe { str::from_utf8_unchecked(slice::from_raw_parts(ptr, len)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub const unsafe fn str_from_raw_parts_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut str {
|
||||||
|
unsafe { str::from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, len)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn vec_insert_at<T: Copy>(dst: &mut Vec<T>, off: usize, src: &[T]) {
|
||||||
|
unsafe {
|
||||||
|
let dst_len = dst.len();
|
||||||
|
let src_len = src.len();
|
||||||
|
|
||||||
|
// Make room for the new elements. NOTE that this must be done before
|
||||||
|
// we call as_mut_ptr, or else we risk accessing a stale pointer.
|
||||||
|
dst.reserve(src_len);
|
||||||
|
|
||||||
|
let off = off.min(dst_len);
|
||||||
|
let ptr = dst.as_mut_ptr().add(off);
|
||||||
|
|
||||||
|
if off < dst_len {
|
||||||
|
// Move the tail of the vector to make room for the new elements.
|
||||||
|
std::ptr::copy(ptr, ptr.add(src_len), dst_len - off);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the new elements into the vector.
|
||||||
|
std::ptr::copy_nonoverlapping(src.as_ptr(), ptr, src_len);
|
||||||
|
// Update the length of the vector.
|
||||||
|
dst.set_len(dst_len + src_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Works just like `std::hint::cold_path`, but it's stable.
|
||||||
|
#[cold]
|
||||||
|
#[inline(always)]
|
||||||
|
pub const fn cold_path() {}
|
1027
src/icu.rs
Normal file
1027
src/icu.rs
Normal file
File diff suppressed because it is too large
Load diff
488
src/input.rs
Normal file
488
src/input.rs
Normal file
|
@ -0,0 +1,488 @@
|
||||||
|
use crate::helpers::{Point, Size};
|
||||||
|
use crate::vt;
|
||||||
|
|
||||||
|
// TODO: Is this a good idea? I did it to allow typing `kbmod::CTRL | vk::A`.
|
||||||
|
// The reason it's an awkard u32 and not a struct is to hopefully make ABIs easier later.
|
||||||
|
// Of course you could just translate on the ABI boundary, but my hope is that this
|
||||||
|
// design lets me realize some restrictions early on that I can't foresee yet.
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct InputKey(u32);
|
||||||
|
|
||||||
|
impl InputKey {
|
||||||
|
pub const fn new(v: u32) -> Self {
|
||||||
|
Self(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn value(&self) -> u32 {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn key(&self) -> InputKey {
|
||||||
|
InputKey(self.0 & 0x00FFFFFF)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn modifiers(&self) -> InputKeyMod {
|
||||||
|
InputKeyMod(self.0 & 0xFF000000)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn modifiers_contains(&self, modifier: InputKeyMod) -> bool {
|
||||||
|
(self.0 & modifier.0) != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn with_modifiers(&self, modifiers: InputKeyMod) -> InputKey {
|
||||||
|
InputKey(self.0 | modifiers.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[repr(transparent)]
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct InputKeyMod(u32);
|
||||||
|
|
||||||
|
impl InputKeyMod {
|
||||||
|
const fn new(v: u32) -> Self {
|
||||||
|
Self(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn contains(&self, modifier: InputKeyMod) -> bool {
|
||||||
|
(self.0 & modifier.0) != 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::BitOr<InputKeyMod> for InputKey {
|
||||||
|
type Output = InputKey;
|
||||||
|
|
||||||
|
fn bitor(self, rhs: InputKeyMod) -> InputKey {
|
||||||
|
InputKey(self.0 | rhs.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::BitOr<InputKey> for InputKeyMod {
|
||||||
|
type Output = InputKey;
|
||||||
|
|
||||||
|
fn bitor(self, rhs: InputKey) -> InputKey {
|
||||||
|
InputKey(self.0 | rhs.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::ops::BitOrAssign for InputKeyMod {
|
||||||
|
fn bitor_assign(&mut self, rhs: Self) {
|
||||||
|
self.0 |= rhs.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The codes defined here match the VK_* constants on Windows.
|
||||||
|
// It's a convenient way to handle keyboard input, even on other platforms.
|
||||||
|
pub mod vk {
|
||||||
|
use super::InputKey;
|
||||||
|
|
||||||
|
pub const NULL: InputKey = InputKey::new(0x00);
|
||||||
|
pub const BACK: InputKey = InputKey::new(0x08);
|
||||||
|
pub const TAB: InputKey = InputKey::new(0x09);
|
||||||
|
pub const RETURN: InputKey = InputKey::new(0x0D);
|
||||||
|
pub const ESCAPE: InputKey = InputKey::new(0x1B);
|
||||||
|
pub const SPACE: InputKey = InputKey::new(0x20);
|
||||||
|
pub const PRIOR: InputKey = InputKey::new(0x21);
|
||||||
|
pub const NEXT: InputKey = InputKey::new(0x22);
|
||||||
|
|
||||||
|
pub const END: InputKey = InputKey::new(0x23);
|
||||||
|
pub const HOME: InputKey = InputKey::new(0x24);
|
||||||
|
|
||||||
|
pub const LEFT: InputKey = InputKey::new(0x25);
|
||||||
|
pub const UP: InputKey = InputKey::new(0x26);
|
||||||
|
pub const RIGHT: InputKey = InputKey::new(0x27);
|
||||||
|
pub const DOWN: InputKey = InputKey::new(0x28);
|
||||||
|
|
||||||
|
pub const INSERT: InputKey = InputKey::new(0x2D);
|
||||||
|
pub const DELETE: InputKey = InputKey::new(0x2E);
|
||||||
|
|
||||||
|
pub const A: InputKey = InputKey::new('A' as u32);
|
||||||
|
pub const B: InputKey = InputKey::new('B' as u32);
|
||||||
|
pub const C: InputKey = InputKey::new('C' as u32);
|
||||||
|
pub const D: InputKey = InputKey::new('D' as u32);
|
||||||
|
pub const E: InputKey = InputKey::new('E' as u32);
|
||||||
|
pub const F: InputKey = InputKey::new('F' as u32);
|
||||||
|
pub const G: InputKey = InputKey::new('G' as u32);
|
||||||
|
pub const H: InputKey = InputKey::new('H' as u32);
|
||||||
|
pub const I: InputKey = InputKey::new('I' as u32);
|
||||||
|
pub const J: InputKey = InputKey::new('J' as u32);
|
||||||
|
pub const K: InputKey = InputKey::new('K' as u32);
|
||||||
|
pub const L: InputKey = InputKey::new('L' as u32);
|
||||||
|
pub const M: InputKey = InputKey::new('M' as u32);
|
||||||
|
pub const N: InputKey = InputKey::new('N' as u32);
|
||||||
|
pub const O: InputKey = InputKey::new('O' as u32);
|
||||||
|
pub const P: InputKey = InputKey::new('P' as u32);
|
||||||
|
pub const Q: InputKey = InputKey::new('Q' as u32);
|
||||||
|
pub const R: InputKey = InputKey::new('R' as u32);
|
||||||
|
pub const S: InputKey = InputKey::new('S' as u32);
|
||||||
|
pub const T: InputKey = InputKey::new('T' as u32);
|
||||||
|
pub const U: InputKey = InputKey::new('U' as u32);
|
||||||
|
pub const V: InputKey = InputKey::new('V' as u32);
|
||||||
|
pub const W: InputKey = InputKey::new('W' as u32);
|
||||||
|
pub const X: InputKey = InputKey::new('X' as u32);
|
||||||
|
pub const Y: InputKey = InputKey::new('Y' as u32);
|
||||||
|
pub const Z: InputKey = InputKey::new('Z' as u32);
|
||||||
|
|
||||||
|
pub const NUMPAD0: InputKey = InputKey::new(0x60);
|
||||||
|
pub const NUMPAD1: InputKey = InputKey::new(0x61);
|
||||||
|
pub const NUMPAD2: InputKey = InputKey::new(0x62);
|
||||||
|
pub const NUMPAD3: InputKey = InputKey::new(0x63);
|
||||||
|
pub const NUMPAD4: InputKey = InputKey::new(0x64);
|
||||||
|
pub const NUMPAD5: InputKey = InputKey::new(0x65);
|
||||||
|
pub const NUMPAD6: InputKey = InputKey::new(0x66);
|
||||||
|
pub const NUMPAD7: InputKey = InputKey::new(0x67);
|
||||||
|
pub const NUMPAD8: InputKey = InputKey::new(0x68);
|
||||||
|
pub const NUMPAD9: InputKey = InputKey::new(0x69);
|
||||||
|
pub const MULTIPLY: InputKey = InputKey::new(0x6A);
|
||||||
|
pub const ADD: InputKey = InputKey::new(0x6B);
|
||||||
|
pub const SEPARATOR: InputKey = InputKey::new(0x6C);
|
||||||
|
pub const SUBTRACT: InputKey = InputKey::new(0x6D);
|
||||||
|
pub const DECIMAL: InputKey = InputKey::new(0x6E);
|
||||||
|
pub const DIVIDE: InputKey = InputKey::new(0x6F);
|
||||||
|
|
||||||
|
pub const F1: InputKey = InputKey::new(0x70);
|
||||||
|
pub const F2: InputKey = InputKey::new(0x71);
|
||||||
|
pub const F3: InputKey = InputKey::new(0x72);
|
||||||
|
pub const F4: InputKey = InputKey::new(0x73);
|
||||||
|
pub const F5: InputKey = InputKey::new(0x74);
|
||||||
|
pub const F6: InputKey = InputKey::new(0x75);
|
||||||
|
pub const F7: InputKey = InputKey::new(0x76);
|
||||||
|
pub const F8: InputKey = InputKey::new(0x77);
|
||||||
|
pub const F9: InputKey = InputKey::new(0x78);
|
||||||
|
pub const F10: InputKey = InputKey::new(0x79);
|
||||||
|
pub const F11: InputKey = InputKey::new(0x7A);
|
||||||
|
pub const F12: InputKey = InputKey::new(0x7B);
|
||||||
|
pub const F13: InputKey = InputKey::new(0x7C);
|
||||||
|
pub const F14: InputKey = InputKey::new(0x7D);
|
||||||
|
pub const F15: InputKey = InputKey::new(0x7E);
|
||||||
|
pub const F16: InputKey = InputKey::new(0x7F);
|
||||||
|
pub const F17: InputKey = InputKey::new(0x80);
|
||||||
|
pub const F18: InputKey = InputKey::new(0x81);
|
||||||
|
pub const F19: InputKey = InputKey::new(0x82);
|
||||||
|
pub const F20: InputKey = InputKey::new(0x83);
|
||||||
|
pub const F21: InputKey = InputKey::new(0x84);
|
||||||
|
pub const F22: InputKey = InputKey::new(0x85);
|
||||||
|
pub const F23: InputKey = InputKey::new(0x86);
|
||||||
|
pub const F24: InputKey = InputKey::new(0x87);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod kbmod {
|
||||||
|
use super::InputKeyMod;
|
||||||
|
|
||||||
|
pub const NONE: InputKeyMod = InputKeyMod::new(0x00000000);
|
||||||
|
pub const CTRL: InputKeyMod = InputKeyMod::new(0x01000000);
|
||||||
|
pub const ALT: InputKeyMod = InputKeyMod::new(0x02000000);
|
||||||
|
pub const SHIFT: InputKeyMod = InputKeyMod::new(0x04000000);
|
||||||
|
|
||||||
|
pub const CTRL_ALT: InputKeyMod = InputKeyMod::new(0x03000000);
|
||||||
|
pub const CTRL_SHIFT: InputKeyMod = InputKeyMod::new(0x05000000);
|
||||||
|
pub const ALT_SHIFT: InputKeyMod = InputKeyMod::new(0x06000000);
|
||||||
|
pub const CTRL_ALT_SHIFT: InputKeyMod = InputKeyMod::new(0x07000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct InputText<'a> {
|
||||||
|
pub text: &'a str,
|
||||||
|
pub bracketed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
|
||||||
|
pub enum InputMouseState {
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
|
|
||||||
|
// These 3 carry their state between frames.
|
||||||
|
Left,
|
||||||
|
Middle,
|
||||||
|
Right,
|
||||||
|
|
||||||
|
// These 2 get reset to None on the next frame.
|
||||||
|
Release,
|
||||||
|
Scroll,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct InputMouse {
|
||||||
|
pub state: InputMouseState,
|
||||||
|
pub modifiers: InputKeyMod,
|
||||||
|
pub position: Point,
|
||||||
|
pub scroll: Point,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum Input<'input> {
|
||||||
|
Resize(Size),
|
||||||
|
Text(InputText<'input>),
|
||||||
|
Keyboard(InputKey),
|
||||||
|
Mouse(InputMouse),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Parser {
|
||||||
|
want: bool,
|
||||||
|
buf: [u8; 3],
|
||||||
|
len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
want: false,
|
||||||
|
buf: [0; 3],
|
||||||
|
len: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Turns VT sequences into keyboard, mouse, etc., inputs.
|
||||||
|
pub fn parse<'parser, 'vt, 'input>(
|
||||||
|
&'parser mut self,
|
||||||
|
stream: vt::Stream<'vt, 'input>,
|
||||||
|
) -> Stream<'parser, 'vt, 'input> {
|
||||||
|
Stream {
|
||||||
|
parser: self,
|
||||||
|
stream,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Stream<'parser, 'vt, 'input> {
|
||||||
|
parser: &'parser mut Parser,
|
||||||
|
stream: vt::Stream<'vt, 'input>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Stream<'_, '_, '_> {
|
||||||
|
/// Parses the next input action from the previously given input.
|
||||||
|
///
|
||||||
|
/// Can't implement Iterator, because this is a "lending iterator".
|
||||||
|
pub fn next(&mut self) -> Option<Input> {
|
||||||
|
if self.parser.want {
|
||||||
|
return self.parse_x10_mouse_coordinates();
|
||||||
|
}
|
||||||
|
|
||||||
|
let token = self.stream.next()?;
|
||||||
|
|
||||||
|
match token {
|
||||||
|
vt::Token::Text(text) => Some(Input::Text(InputText {
|
||||||
|
text,
|
||||||
|
bracketed: false,
|
||||||
|
})),
|
||||||
|
vt::Token::Ctrl(ch) => match ch {
|
||||||
|
'\0' | '\t' | '\r' => Some(Input::Keyboard(InputKey::new(ch as u32))),
|
||||||
|
..='\x1a' => {
|
||||||
|
// Shift control code to A-Z
|
||||||
|
let key = ch as u32 | 0x40;
|
||||||
|
Some(Input::Keyboard(kbmod::CTRL | InputKey::new(key)))
|
||||||
|
}
|
||||||
|
'\x7f' => Some(Input::Keyboard(vk::BACK)),
|
||||||
|
_ => None,
|
||||||
|
},
|
||||||
|
vt::Token::Esc(ch) => {
|
||||||
|
match ch {
|
||||||
|
'\0' => Some(Input::Keyboard(vk::ESCAPE)),
|
||||||
|
' '..='~' => {
|
||||||
|
let ch = ch as u32;
|
||||||
|
let key = ch & !0x20; // Shift a-z to A-Z
|
||||||
|
let modifiers = if (ch & 0x20) != 0 {
|
||||||
|
kbmod::ALT
|
||||||
|
} else {
|
||||||
|
kbmod::ALT_SHIFT
|
||||||
|
};
|
||||||
|
Some(Input::Keyboard(modifiers | InputKey::new(key)))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vt::Token::SS3(ch) => {
|
||||||
|
if ('P'..='S').contains(&ch) {
|
||||||
|
let key = vk::F1.value() + ch as u32 - 'P' as u32;
|
||||||
|
Some(Input::Keyboard(InputKey::new(key)))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vt::Token::Csi(csi) => {
|
||||||
|
match csi.final_byte {
|
||||||
|
'A'..='H' => {
|
||||||
|
const LUT: [u8; 8] = [
|
||||||
|
vk::UP.value() as u8, // A
|
||||||
|
vk::DOWN.value() as u8, // B
|
||||||
|
vk::RIGHT.value() as u8, // C
|
||||||
|
vk::LEFT.value() as u8, // D
|
||||||
|
0, // E
|
||||||
|
vk::END.value() as u8, // F
|
||||||
|
0, // G
|
||||||
|
vk::HOME.value() as u8, // H
|
||||||
|
];
|
||||||
|
let vk = LUT[csi.final_byte as usize - 'A' as usize];
|
||||||
|
if vk != 0 {
|
||||||
|
return Some(Input::Keyboard(
|
||||||
|
InputKey::new(vk as u32) | Self::parse_modifiers(csi),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
'Z' => return Some(Input::Keyboard(kbmod::SHIFT | vk::TAB)),
|
||||||
|
'~' => {
|
||||||
|
const LUT: [u8; 35] = [
|
||||||
|
0,
|
||||||
|
vk::HOME.value() as u8, // 1
|
||||||
|
vk::INSERT.value() as u8, // 2
|
||||||
|
vk::DELETE.value() as u8, // 3
|
||||||
|
vk::END.value() as u8, // 4
|
||||||
|
vk::PRIOR.value() as u8, // 5
|
||||||
|
vk::NEXT.value() as u8, // 6
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
vk::F5.value() as u8, // 15
|
||||||
|
0,
|
||||||
|
vk::F6.value() as u8, // 17
|
||||||
|
vk::F7.value() as u8, // 18
|
||||||
|
vk::F8.value() as u8, // 19
|
||||||
|
vk::F9.value() as u8, // 20
|
||||||
|
vk::F10.value() as u8, // 21
|
||||||
|
0,
|
||||||
|
vk::F11.value() as u8, // 23
|
||||||
|
vk::F12.value() as u8, // 24
|
||||||
|
vk::F13.value() as u8, // 25
|
||||||
|
vk::F14.value() as u8, // 26
|
||||||
|
0,
|
||||||
|
vk::F15.value() as u8, // 28
|
||||||
|
vk::F16.value() as u8, // 29
|
||||||
|
0,
|
||||||
|
vk::F17.value() as u8, // 31
|
||||||
|
vk::F18.value() as u8, // 32
|
||||||
|
vk::F19.value() as u8, // 33
|
||||||
|
vk::F20.value() as u8, // 34
|
||||||
|
];
|
||||||
|
let p0 = csi.params[0];
|
||||||
|
if p0 >= 0 && p0 <= LUT.len() as i32 {
|
||||||
|
let vk = LUT[p0 as usize];
|
||||||
|
if vk != 0 {
|
||||||
|
return Some(Input::Keyboard(
|
||||||
|
InputKey::new(vk as u32) | Self::parse_modifiers(csi),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
'm' | 'M' if csi.private_byte == '<' => {
|
||||||
|
let btn = csi.params[0];
|
||||||
|
let mut mouse = InputMouse {
|
||||||
|
state: InputMouseState::None,
|
||||||
|
modifiers: kbmod::NONE,
|
||||||
|
position: Point::default(),
|
||||||
|
scroll: Point::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
mouse.state = InputMouseState::None;
|
||||||
|
if (btn & 0x40) != 0 {
|
||||||
|
mouse.state = InputMouseState::Scroll;
|
||||||
|
mouse.scroll.y += if (btn & 0x01) != 0 { 3 } else { -3 };
|
||||||
|
} else if csi.final_byte == 'M' {
|
||||||
|
const STATES: [InputMouseState; 4] = [
|
||||||
|
InputMouseState::Left,
|
||||||
|
InputMouseState::Middle,
|
||||||
|
InputMouseState::Right,
|
||||||
|
InputMouseState::None,
|
||||||
|
];
|
||||||
|
mouse.state = STATES[(btn as usize) & 0x03];
|
||||||
|
}
|
||||||
|
|
||||||
|
mouse.modifiers = kbmod::NONE;
|
||||||
|
mouse.modifiers |= if (btn & 0x04) != 0 {
|
||||||
|
kbmod::SHIFT
|
||||||
|
} else {
|
||||||
|
kbmod::NONE
|
||||||
|
};
|
||||||
|
mouse.modifiers |= if (btn & 0x08) != 0 {
|
||||||
|
kbmod::ALT
|
||||||
|
} else {
|
||||||
|
kbmod::NONE
|
||||||
|
};
|
||||||
|
mouse.modifiers |= if (btn & 0x10f) != 0 {
|
||||||
|
kbmod::CTRL
|
||||||
|
} else {
|
||||||
|
kbmod::NONE
|
||||||
|
};
|
||||||
|
|
||||||
|
mouse.position.x = csi.params[1] - 1;
|
||||||
|
mouse.position.y = csi.params[2] - 1;
|
||||||
|
Some(Input::Mouse(mouse))
|
||||||
|
}
|
||||||
|
'M' if csi.param_count == 0 => {
|
||||||
|
self.parser.want = true;
|
||||||
|
None
|
||||||
|
}
|
||||||
|
't' if csi.params[0] == 8 => {
|
||||||
|
// Window Size
|
||||||
|
let width = csi.params[2].clamp(1, 32767);
|
||||||
|
let height = csi.params[1].clamp(1, 32767);
|
||||||
|
Some(Input::Resize(Size { width, height }))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Implements the X10 mouse protocol via `CSI M CbCxCy`.
|
||||||
|
///
|
||||||
|
/// You want to send numeric mouse coordinates.
|
||||||
|
/// You have CSI sequences with numeric parameters.
|
||||||
|
/// So, of course you put the coordinates as shifted ASCII characters after
|
||||||
|
/// the end of the sequence. Limited coordinate range and complicated parsing!
|
||||||
|
/// This is so puzzling to me. The existence of this function makes me unhappy.
|
||||||
|
#[cold]
|
||||||
|
fn parse_x10_mouse_coordinates(&mut self) -> Option<Input> {
|
||||||
|
self.parser.len += self.stream.read(&mut self.parser.buf[self.parser.len..]);
|
||||||
|
if self.parser.len < 3 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let button = self.parser.buf[0] & 0b11;
|
||||||
|
let modifier = self.parser.buf[0] & 0b11100;
|
||||||
|
let x = self.parser.buf[1] as i32 - 0x21;
|
||||||
|
let y = self.parser.buf[2] as i32 - 0x21;
|
||||||
|
let action = match button {
|
||||||
|
0 => InputMouseState::Left,
|
||||||
|
1 => InputMouseState::Middle,
|
||||||
|
2 => InputMouseState::Right,
|
||||||
|
_ => InputMouseState::None,
|
||||||
|
};
|
||||||
|
let modifiers = match modifier {
|
||||||
|
4 => kbmod::SHIFT,
|
||||||
|
8 => kbmod::ALT,
|
||||||
|
16 => kbmod::CTRL,
|
||||||
|
_ => kbmod::NONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
self.parser.want = false;
|
||||||
|
self.parser.len = 0;
|
||||||
|
|
||||||
|
Some(Input::Mouse(InputMouse {
|
||||||
|
state: action,
|
||||||
|
modifiers,
|
||||||
|
position: Point { x, y },
|
||||||
|
scroll: Point::default(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_modifiers(csi: &vt::Csi) -> InputKeyMod {
|
||||||
|
let mut modifiers = kbmod::NONE;
|
||||||
|
let p1 = (csi.params[1] - 1).max(0);
|
||||||
|
if (p1 & 0x01) != 0 {
|
||||||
|
modifiers |= kbmod::SHIFT;
|
||||||
|
}
|
||||||
|
if (p1 & 0x02) != 0 {
|
||||||
|
modifiers |= kbmod::ALT;
|
||||||
|
}
|
||||||
|
if (p1 & 0x04) != 0 {
|
||||||
|
modifiers |= kbmod::CTRL;
|
||||||
|
}
|
||||||
|
modifiers
|
||||||
|
}
|
||||||
|
}
|
663
src/loc.rs
Normal file
663
src/loc.rs
Normal file
|
@ -0,0 +1,663 @@
|
||||||
|
use crate::sys;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum LocId {
|
||||||
|
Ctrl,
|
||||||
|
Alt,
|
||||||
|
Shift,
|
||||||
|
|
||||||
|
// File menu
|
||||||
|
File,
|
||||||
|
FileSave,
|
||||||
|
FileSaveAs,
|
||||||
|
FileExit,
|
||||||
|
|
||||||
|
// Edit menu
|
||||||
|
Edit,
|
||||||
|
EditUndo,
|
||||||
|
EditRedo,
|
||||||
|
EditCut,
|
||||||
|
EditCopy,
|
||||||
|
EditPaste,
|
||||||
|
EditFind,
|
||||||
|
EditReplace,
|
||||||
|
|
||||||
|
// View menu
|
||||||
|
View,
|
||||||
|
ViewWordWrap,
|
||||||
|
|
||||||
|
// Help menu
|
||||||
|
Help,
|
||||||
|
HelpAbout,
|
||||||
|
|
||||||
|
// Exit dialog
|
||||||
|
UnsavedChangesDialogTitle,
|
||||||
|
UnsavedChangesDialogDescription,
|
||||||
|
UnsavedChangesDialogYes,
|
||||||
|
UnsavedChangesDialogNo,
|
||||||
|
UnsavedChangesDialogCancel,
|
||||||
|
|
||||||
|
// About dialog
|
||||||
|
AboutDialogTitle,
|
||||||
|
AboutDialogDescription,
|
||||||
|
AboutDialogVersion,
|
||||||
|
|
||||||
|
SearchLabel,
|
||||||
|
SearchClose,
|
||||||
|
SearchMatchCase,
|
||||||
|
SearchWholeWord,
|
||||||
|
SearchUseRegex,
|
||||||
|
|
||||||
|
EncodingReopen,
|
||||||
|
EncodingConvert,
|
||||||
|
|
||||||
|
IndentationTabs,
|
||||||
|
IndentationSpaces,
|
||||||
|
|
||||||
|
SaveAsDialogTitle,
|
||||||
|
SaveAsDialogFilenameLabel,
|
||||||
|
|
||||||
|
Count,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum LangId {
|
||||||
|
// Base language. It's always the first one.
|
||||||
|
en,
|
||||||
|
|
||||||
|
// Other languages. Sorted alphabetically.
|
||||||
|
de,
|
||||||
|
es,
|
||||||
|
fr,
|
||||||
|
it,
|
||||||
|
ja,
|
||||||
|
ko,
|
||||||
|
pt_br,
|
||||||
|
ru,
|
||||||
|
zh_hans,
|
||||||
|
zh_hant,
|
||||||
|
|
||||||
|
Count,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[rustfmt::skip]
|
||||||
|
const S_LANG_LUT: [[&str; LangId::Count as usize]; LocId::Count as usize] = [
|
||||||
|
// Ctrl
|
||||||
|
[
|
||||||
|
/* en */ "Ctrl",
|
||||||
|
/* de */ "Strg",
|
||||||
|
/* es */ "Ctrl",
|
||||||
|
/* fr */ "Ctrl",
|
||||||
|
/* it */ "Ctrl",
|
||||||
|
/* ja */ "Ctrl",
|
||||||
|
/* ko */ "Ctrl",
|
||||||
|
/* pt_br */ "Ctrl",
|
||||||
|
/* ru */ "Ctrl",
|
||||||
|
/* zh_hans */ "Ctrl",
|
||||||
|
/* zh_hant */ "Ctrl",
|
||||||
|
],
|
||||||
|
// Alt
|
||||||
|
[
|
||||||
|
/* en */ "Alt",
|
||||||
|
/* de */ "Alt",
|
||||||
|
/* es */ "Alt",
|
||||||
|
/* fr */ "Alt",
|
||||||
|
/* it */ "Alt",
|
||||||
|
/* ja */ "Alt",
|
||||||
|
/* ko */ "Alt",
|
||||||
|
/* pt_br */ "Alt",
|
||||||
|
/* ru */ "Alt",
|
||||||
|
/* zh_hans */ "Alt",
|
||||||
|
/* zh_hant */ "Alt",
|
||||||
|
],
|
||||||
|
// Shift
|
||||||
|
[
|
||||||
|
/* en */ "Shift",
|
||||||
|
/* de */ "Umschalt",
|
||||||
|
/* es */ "Mayús",
|
||||||
|
/* fr */ "Maj",
|
||||||
|
/* it */ "Maiusc",
|
||||||
|
/* ja */ "Shift",
|
||||||
|
/* ko */ "Shift",
|
||||||
|
/* pt_br */ "Shift",
|
||||||
|
/* ru */ "Shift",
|
||||||
|
/* zh_hans */ "Shift",
|
||||||
|
/* zh_hant */ "Shift",
|
||||||
|
],
|
||||||
|
|
||||||
|
// File
|
||||||
|
[
|
||||||
|
/* en */ "File",
|
||||||
|
/* de */ "Datei",
|
||||||
|
/* es */ "Archivo",
|
||||||
|
/* fr */ "Fichier",
|
||||||
|
/* it */ "File",
|
||||||
|
/* ja */ "ファイル",
|
||||||
|
/* ko */ "파일",
|
||||||
|
/* pt_br */ "Arquivo",
|
||||||
|
/* ru */ "Файл",
|
||||||
|
/* zh_hans */ "文件",
|
||||||
|
/* zh_hant */ "檔案",
|
||||||
|
],
|
||||||
|
// FileSave
|
||||||
|
[
|
||||||
|
/* en */ "Save",
|
||||||
|
/* de */ "Speichern",
|
||||||
|
/* es */ "Guardar",
|
||||||
|
/* fr */ "Enregistrer",
|
||||||
|
/* it */ "Salva",
|
||||||
|
/* ja */ "保存",
|
||||||
|
/* ko */ "저장",
|
||||||
|
/* pt_br */ "Salvar",
|
||||||
|
/* ru */ "Сохранить",
|
||||||
|
/* zh_hans */ "保存",
|
||||||
|
/* zh_hant */ "儲存",
|
||||||
|
],
|
||||||
|
// FileSaveAs
|
||||||
|
// NOTE: Exact same translation as SaveAsDialogTitle, and both should be kept in sync.
|
||||||
|
[
|
||||||
|
/* en */ "Save As…",
|
||||||
|
/* de */ "Speichern unter…",
|
||||||
|
/* es */ "Guardar como…",
|
||||||
|
/* fr */ "Enregistrer sous…",
|
||||||
|
/* it */ "Salva come…",
|
||||||
|
/* ja */ "名前を付けて保存…",
|
||||||
|
/* ko */ "다른 이름으로 저장…",
|
||||||
|
/* pt_br */ "Salvar como…",
|
||||||
|
/* ru */ "Сохранить как…",
|
||||||
|
/* zh_hans */ "另存为…",
|
||||||
|
/* zh_hant */ "另存新檔…",
|
||||||
|
],
|
||||||
|
// FileExit
|
||||||
|
[
|
||||||
|
/* en */ "Exit",
|
||||||
|
/* de */ "Beenden",
|
||||||
|
/* es */ "Salir",
|
||||||
|
/* fr */ "Quitter",
|
||||||
|
/* it */ "Esci",
|
||||||
|
/* ja */ "終了",
|
||||||
|
/* ko */ "종료",
|
||||||
|
/* pt_br */ "Sair",
|
||||||
|
/* ru */ "Выход",
|
||||||
|
/* zh_hans */ "退出",
|
||||||
|
/* zh_hant */ "退出",
|
||||||
|
],
|
||||||
|
|
||||||
|
// Edit
|
||||||
|
[
|
||||||
|
/* en */ "Edit",
|
||||||
|
/* de */ "Bearbeiten",
|
||||||
|
/* es */ "Editar",
|
||||||
|
/* fr */ "Éditer",
|
||||||
|
/* it */ "Modifica",
|
||||||
|
/* ja */ "編集",
|
||||||
|
/* ko */ "편집",
|
||||||
|
/* pt_br */ "Editar",
|
||||||
|
/* ru */ "Правка",
|
||||||
|
/* zh_hans */ "编辑",
|
||||||
|
/* zh_hant */ "編輯",
|
||||||
|
],
|
||||||
|
// EditUndo
|
||||||
|
[
|
||||||
|
/* en */ "Undo",
|
||||||
|
/* de */ "Rückgängig",
|
||||||
|
/* es */ "Deshacer",
|
||||||
|
/* fr */ "Annuler",
|
||||||
|
/* it */ "Annulla",
|
||||||
|
/* ja */ "元に戻す",
|
||||||
|
/* ko */ "실행 취소",
|
||||||
|
/* pt_br */ "Desfazer",
|
||||||
|
/* ru */ "Отменить",
|
||||||
|
/* zh_hans */ "撤销",
|
||||||
|
/* zh_hant */ "復原",
|
||||||
|
],
|
||||||
|
// EditRedo
|
||||||
|
[
|
||||||
|
/* en */ "Redo",
|
||||||
|
/* de */ "Wiederholen",
|
||||||
|
/* es */ "Rehacer",
|
||||||
|
/* fr */ "Rétablir",
|
||||||
|
/* it */ "Ripeti",
|
||||||
|
/* ja */ "やり直し",
|
||||||
|
/* ko */ "다시 실행",
|
||||||
|
/* pt_br */ "Refazer",
|
||||||
|
/* ru */ "Повторить",
|
||||||
|
/* zh_hans */ "重做",
|
||||||
|
/* zh_hant */ "重做",
|
||||||
|
],
|
||||||
|
// EditCut
|
||||||
|
[
|
||||||
|
/* en */ "Cut",
|
||||||
|
/* de */ "Ausschneiden",
|
||||||
|
/* es */ "Cortar",
|
||||||
|
/* fr */ "Couper",
|
||||||
|
/* it */ "Taglia",
|
||||||
|
/* ja */ "切り取り",
|
||||||
|
/* ko */ "잘라내기",
|
||||||
|
/* pt_br */ "Cortar",
|
||||||
|
/* ru */ "Вырезать",
|
||||||
|
/* zh_hans */ "剪切",
|
||||||
|
/* zh_hant */ "剪下",
|
||||||
|
],
|
||||||
|
// EditCopy
|
||||||
|
[
|
||||||
|
/* en */ "Copy",
|
||||||
|
/* de */ "Kopieren",
|
||||||
|
/* es */ "Copiar",
|
||||||
|
/* fr */ "Copier",
|
||||||
|
/* it */ "Copia",
|
||||||
|
/* ja */ "コピー",
|
||||||
|
/* ko */ "복사",
|
||||||
|
/* pt_br */ "Copiar",
|
||||||
|
/* ru */ "Копировать",
|
||||||
|
/* zh_hans */ "复制",
|
||||||
|
/* zh_hant */ "複製",
|
||||||
|
],
|
||||||
|
// EditPaste
|
||||||
|
[
|
||||||
|
/* en */ "Paste",
|
||||||
|
/* de */ "Einfügen",
|
||||||
|
/* es */ "Pegar",
|
||||||
|
/* fr */ "Coller",
|
||||||
|
/* it */ "Incolla",
|
||||||
|
/* ja */ "貼り付け",
|
||||||
|
/* ko */ "붙여넣기",
|
||||||
|
/* pt_br */ "Colar",
|
||||||
|
/* ru */ "Вставить",
|
||||||
|
/* zh_hans */ "粘贴",
|
||||||
|
/* zh_hant */ "貼上",
|
||||||
|
],
|
||||||
|
// EditFind
|
||||||
|
[
|
||||||
|
/* en */ "Find",
|
||||||
|
/* de */ "Suchen",
|
||||||
|
/* es */ "Buscar",
|
||||||
|
/* fr */ "Rechercher",
|
||||||
|
/* it */ "Trova",
|
||||||
|
/* ja */ "検索",
|
||||||
|
/* ko */ "찾기",
|
||||||
|
/* pt_br */ "Encontrar",
|
||||||
|
/* ru */ "Найти",
|
||||||
|
/* zh_hans */ "查找",
|
||||||
|
/* zh_hant */ "尋找",
|
||||||
|
],
|
||||||
|
// EditReplace
|
||||||
|
[
|
||||||
|
/* en */ "Replace",
|
||||||
|
/* de */ "Ersetzen",
|
||||||
|
/* es */ "Reemplazar",
|
||||||
|
/* fr */ "Remplacer",
|
||||||
|
/* it */ "Sostituisci",
|
||||||
|
/* ja */ "置換",
|
||||||
|
/* ko */ "바꾸기",
|
||||||
|
/* pt_br */ "Substituir",
|
||||||
|
/* ru */ "Заменить",
|
||||||
|
/* zh_hans */ "替换",
|
||||||
|
/* zh_hant */ "取代",
|
||||||
|
],
|
||||||
|
|
||||||
|
// View
|
||||||
|
[
|
||||||
|
/* en */ "View",
|
||||||
|
/* de */ "Ansicht",
|
||||||
|
/* es */ "Ver",
|
||||||
|
/* fr */ "Affichage",
|
||||||
|
/* it */ "Visualizza",
|
||||||
|
/* ja */ "表示",
|
||||||
|
/* ko */ "보기",
|
||||||
|
/* pt_br */ "Exibir",
|
||||||
|
/* ru */ "Вид",
|
||||||
|
/* zh_hans */ "视图",
|
||||||
|
/* zh_hant */ "檢視",
|
||||||
|
],
|
||||||
|
// ViewWordWrap
|
||||||
|
[
|
||||||
|
/* en */ "Word Wrap",
|
||||||
|
/* de */ "Zeilenumbruch",
|
||||||
|
/* es */ "Ajuste de línea",
|
||||||
|
/* fr */ "Retour à la ligne",
|
||||||
|
/* it */ "A capo automatico",
|
||||||
|
/* ja */ "折り返し",
|
||||||
|
/* ko */ "자동 줄 바꿈",
|
||||||
|
/* pt_br */ "Quebra de linha",
|
||||||
|
/* ru */ "Перенос слов",
|
||||||
|
/* zh_hans */ "自动换行",
|
||||||
|
/* zh_hant */ "自動換行",
|
||||||
|
],
|
||||||
|
|
||||||
|
// Help
|
||||||
|
[
|
||||||
|
/* en */ "Help",
|
||||||
|
/* de */ "Hilfe",
|
||||||
|
/* es */ "Ayuda",
|
||||||
|
/* fr */ "Aide",
|
||||||
|
/* it */ "Aiuto",
|
||||||
|
/* ja */ "ヘルプ",
|
||||||
|
/* ko */ "도움말",
|
||||||
|
/* pt_br */ "Ajuda",
|
||||||
|
/* ru */ "Помощь",
|
||||||
|
/* zh_hans */ "帮助",
|
||||||
|
/* zh_hant */ "幫助",
|
||||||
|
],
|
||||||
|
// HelpAbout
|
||||||
|
[
|
||||||
|
/* en */ "About",
|
||||||
|
/* de */ "Über",
|
||||||
|
/* es */ "Acerca de",
|
||||||
|
/* fr */ "À propos",
|
||||||
|
/* it */ "Informazioni",
|
||||||
|
/* ja */ "情報",
|
||||||
|
/* ko */ "정보",
|
||||||
|
/* pt_br */ "Sobre",
|
||||||
|
/* ru */ "О программе",
|
||||||
|
/* zh_hans */ "关于",
|
||||||
|
/* zh_hant */ "關於",
|
||||||
|
],
|
||||||
|
|
||||||
|
// UnsavedChangesDialogTitle
|
||||||
|
[
|
||||||
|
/* en */ "Unsaved Changes",
|
||||||
|
/* de */ "Ungespeicherte Änderungen",
|
||||||
|
/* es */ "Cambios sin guardar",
|
||||||
|
/* fr */ "Modifications non enregistrées",
|
||||||
|
/* it */ "Modifiche non salvate",
|
||||||
|
/* ja */ "未保存の変更",
|
||||||
|
/* ko */ "저장되지 않은 변경 사항",
|
||||||
|
/* pt_br */ "Alterações não salvas",
|
||||||
|
/* ru */ "Несохраненные изменения",
|
||||||
|
/* zh_hans */ "未保存的更改",
|
||||||
|
/* zh_hant */ "未儲存的變更",
|
||||||
|
],
|
||||||
|
// UnsavedChangesDialogDescription
|
||||||
|
[
|
||||||
|
/* en */ "Do you want to save the changes you made?",
|
||||||
|
/* de */ "Möchten Sie die vorgenommenen Änderungen speichern?",
|
||||||
|
/* es */ "¿Desea guardar los cambios realizados?",
|
||||||
|
/* fr */ "Voulez-vous enregistrer les modifications apportées?",
|
||||||
|
/* it */ "Vuoi salvare le modifiche apportate?",
|
||||||
|
/* ja */ "変更内容を保存しますか?",
|
||||||
|
/* ko */ "변경한 내용을 저장하시겠습니까?",
|
||||||
|
/* pt_br */ "Deseja salvar as alterações feitas?",
|
||||||
|
/* ru */ "Вы хотите сохранить внесённые изменения?",
|
||||||
|
/* zh_hans */ "您要保存所做的更改吗?",
|
||||||
|
/* zh_hant */ "您要保存所做的變更嗎?",
|
||||||
|
],
|
||||||
|
// UnsavedChangesDialogYes
|
||||||
|
[
|
||||||
|
/* en */ "Save",
|
||||||
|
/* de */ "Speichern",
|
||||||
|
/* es */ "Guardar",
|
||||||
|
/* fr */ "Enregistrer",
|
||||||
|
/* it */ "Salva",
|
||||||
|
/* ja */ "保存",
|
||||||
|
/* ko */ "저장",
|
||||||
|
/* pt_br */ "Salvar",
|
||||||
|
/* ru */ "Сохранить",
|
||||||
|
/* zh_hans */ "保存",
|
||||||
|
/* zh_hant */ "儲存",
|
||||||
|
],
|
||||||
|
// UnsavedChangesDialogNo
|
||||||
|
[
|
||||||
|
/* en */ "Don't Save",
|
||||||
|
/* de */ "Nicht speichern",
|
||||||
|
/* es */ "No guardar",
|
||||||
|
/* fr */ "Ne pas enregistrer",
|
||||||
|
/* it */ "Non salvare",
|
||||||
|
/* ja */ "保存しない",
|
||||||
|
/* ko */ "저장 안 함",
|
||||||
|
/* pt_br */ "Não salvar",
|
||||||
|
/* ru */ "Не сохранять",
|
||||||
|
/* zh_hans */ "不保存",
|
||||||
|
/* zh_hant */ "不儲存",
|
||||||
|
],
|
||||||
|
// UnsavedChangesDialogCancel
|
||||||
|
[
|
||||||
|
/* en */ "Cancel",
|
||||||
|
/* de */ "Abbrechen",
|
||||||
|
/* es */ "Cancelar",
|
||||||
|
/* fr */ "Annuler",
|
||||||
|
/* it */ "Annulla",
|
||||||
|
/* ja */ "キャンセル",
|
||||||
|
/* ko */ "취소",
|
||||||
|
/* pt_br */ "Cancelar",
|
||||||
|
/* ru */ "Отмена",
|
||||||
|
/* zh_hans */ "取消",
|
||||||
|
/* zh_hant */ "取消",
|
||||||
|
],
|
||||||
|
|
||||||
|
// AboutDialogTitle
|
||||||
|
[
|
||||||
|
/* en */ "About",
|
||||||
|
/* de */ "Über",
|
||||||
|
/* es */ "Acerca de",
|
||||||
|
/* fr */ "À propos",
|
||||||
|
/* it */ "Informazioni",
|
||||||
|
/* ja */ "情報",
|
||||||
|
/* ko */ "정보",
|
||||||
|
/* pt_br */ "Sobre",
|
||||||
|
/* ru */ "О программе",
|
||||||
|
/* zh_hans */ "关于",
|
||||||
|
/* zh_hant */ "關於",
|
||||||
|
],
|
||||||
|
// AboutDialogDescription
|
||||||
|
[
|
||||||
|
/* en */ "Grug's favorite editor",
|
||||||
|
/* de */ "Grugs Lieblingseditor",
|
||||||
|
/* es */ "El editor favorito de Grug",
|
||||||
|
/* fr */ "L'éditeur préféré de Grug",
|
||||||
|
/* it */ "L'editor preferito di Grug",
|
||||||
|
/* ja */ "Grugのお気に入りエディタ",
|
||||||
|
/* ko */ "Grug이 가장 좋아하는 편집기",
|
||||||
|
/* pt_br */ "O editor favorito do Grug",
|
||||||
|
/* ru */ "Любимый редактор Груга",
|
||||||
|
/* zh_hans */ "Grug最喜欢的编辑器",
|
||||||
|
/* zh_hant */ "Grug最喜歡的編輯器",
|
||||||
|
],
|
||||||
|
// AboutDialogVersion
|
||||||
|
[
|
||||||
|
/* en */ "Version: ",
|
||||||
|
/* de */ "Version: ",
|
||||||
|
/* es */ "Versión: ",
|
||||||
|
/* fr */ "Version : ",
|
||||||
|
/* it */ "Versione: ",
|
||||||
|
/* ja */ "バージョン: ",
|
||||||
|
/* ko */ "버전: ",
|
||||||
|
/* pt_br */ "Versão: ",
|
||||||
|
/* ru */ "Версия: ",
|
||||||
|
/* zh_hans */ "版本:",
|
||||||
|
/* zh_hant */ "版本:",
|
||||||
|
],
|
||||||
|
|
||||||
|
// SearchLabel
|
||||||
|
[
|
||||||
|
/* en */ "Find:",
|
||||||
|
/* de */ "Suchen:",
|
||||||
|
/* es */ "Buscar:",
|
||||||
|
/* fr */ "Rechercher:",
|
||||||
|
/* it */ "Trova:",
|
||||||
|
/* ja */ "検索:",
|
||||||
|
/* ko */ "찾기:",
|
||||||
|
/* pt_br */ "Encontrar:",
|
||||||
|
/* ru */ "Найти:",
|
||||||
|
/* zh_hans */ "查找:",
|
||||||
|
/* zh_hant */ "尋找:",
|
||||||
|
],
|
||||||
|
// SearchClose
|
||||||
|
[
|
||||||
|
/* en */ "Close",
|
||||||
|
/* de */ "Schließen",
|
||||||
|
/* es */ "Cerrar",
|
||||||
|
/* fr */ "Fermer",
|
||||||
|
/* it */ "Chiudi",
|
||||||
|
/* ja */ "閉じる",
|
||||||
|
/* ko */ "닫기",
|
||||||
|
/* pt_br */ "Fechar",
|
||||||
|
/* ru */ "Закрыть",
|
||||||
|
/* zh_hans */ "关闭",
|
||||||
|
/* zh_hant */ "關閉",
|
||||||
|
],
|
||||||
|
// SearchMatchCase
|
||||||
|
[
|
||||||
|
/* en */ "Match Case",
|
||||||
|
/* de */ "Groß/Klein",
|
||||||
|
/* es */ "May/Min",
|
||||||
|
/* fr */ "Casse",
|
||||||
|
/* it */ "Maius/minus",
|
||||||
|
/* ja */ "大/小文字",
|
||||||
|
/* ko */ "대소문자",
|
||||||
|
/* pt_br */ "Maius/minus",
|
||||||
|
/* ru */ "Регистр",
|
||||||
|
/* zh_hans */ "区分大小写",
|
||||||
|
/* zh_hant */ "區分大小寫",
|
||||||
|
],
|
||||||
|
// SearchWholeWord
|
||||||
|
[
|
||||||
|
/* en */ "Whole Word",
|
||||||
|
/* de */ "Ganzes Wort",
|
||||||
|
/* es */ "Palabra",
|
||||||
|
/* fr */ "Mot entier",
|
||||||
|
/* it */ "Parola",
|
||||||
|
/* ja */ "単語単位",
|
||||||
|
/* ko */ "전체 단어",
|
||||||
|
/* pt_br */ "Palavra",
|
||||||
|
/* ru */ "Слово",
|
||||||
|
/* zh_hans */ "全字匹配",
|
||||||
|
/* zh_hant */ "全字匹配",
|
||||||
|
],
|
||||||
|
// SearchUseRegex
|
||||||
|
[
|
||||||
|
/* en */ "Use Regex",
|
||||||
|
/* de */ "RegEx",
|
||||||
|
/* es */ "RegEx",
|
||||||
|
/* fr */ "RegEx",
|
||||||
|
/* it */ "RegEx",
|
||||||
|
/* ja */ "正規表現",
|
||||||
|
/* ko */ "정규식",
|
||||||
|
/* pt_br */ "RegEx",
|
||||||
|
/* ru */ "RegEx",
|
||||||
|
/* zh_hans */ "正则",
|
||||||
|
/* zh_hant */ "正則",
|
||||||
|
],
|
||||||
|
|
||||||
|
// EncodingReopen
|
||||||
|
[
|
||||||
|
/* en */ "Reopen with encoding",
|
||||||
|
/* de */ "Mit Kodierung erneut öffnen",
|
||||||
|
/* es */ "Reabrir con codificación",
|
||||||
|
/* fr */ "Rouvrir avec un encodage différent",
|
||||||
|
/* it */ "Riapri con codifica",
|
||||||
|
/* ja */ "エンコーディングで再度開く",
|
||||||
|
/* ko */ "인코딩으로 다시 열기",
|
||||||
|
/* pt_br */ "Reabrir com codificação",
|
||||||
|
/* ru */ "Открыть снова с кодировкой",
|
||||||
|
/* zh_hans */ "使用编码重新打开",
|
||||||
|
/* zh_hant */ "使用編碼重新打開",
|
||||||
|
],
|
||||||
|
// EncodingConvert
|
||||||
|
[
|
||||||
|
/* en */ "Convert to encoding",
|
||||||
|
/* de */ "In Kodierung konvertieren",
|
||||||
|
/* es */ "Convertir a otra codificación",
|
||||||
|
/* fr */ "Convertir en encodage",
|
||||||
|
/* it */ "Converti in codifica",
|
||||||
|
/* ja */ "エンコーディングに変換",
|
||||||
|
/* ko */ "인코딩으로 변환",
|
||||||
|
/* pt_br */ "Converter para codificação",
|
||||||
|
/* ru */ "Преобразовать в кодировку",
|
||||||
|
/* zh_hans */ "转换为编码",
|
||||||
|
/* zh_hant */ "轉換為編碼",
|
||||||
|
],
|
||||||
|
|
||||||
|
// IndentationTabs
|
||||||
|
[
|
||||||
|
/* en */ "Tabs",
|
||||||
|
/* de */ "Tabs",
|
||||||
|
/* es */ "Tabulaciones",
|
||||||
|
/* fr */ "Tabulations",
|
||||||
|
/* it */ "Tabulazioni",
|
||||||
|
/* ja */ "タブ",
|
||||||
|
/* ko */ "탭",
|
||||||
|
/* pt_br */ "Tabulações",
|
||||||
|
/* ru */ "Табы",
|
||||||
|
/* zh_hans */ "制表符",
|
||||||
|
/* zh_hant */ "製表符",
|
||||||
|
],
|
||||||
|
// IndentationSpaces
|
||||||
|
[
|
||||||
|
/* en */ "Spaces",
|
||||||
|
/* de */ "Leerzeichen",
|
||||||
|
/* es */ "Espacios",
|
||||||
|
/* fr */ "Espaces",
|
||||||
|
/* it */ "Spazi",
|
||||||
|
/* ja */ "スペース",
|
||||||
|
/* ko */ "공백",
|
||||||
|
/* pt_br */ "Espaços",
|
||||||
|
/* ru */ "Пробелы",
|
||||||
|
/* zh_hans */ "空格",
|
||||||
|
/* zh_hant */ "空格",
|
||||||
|
],
|
||||||
|
|
||||||
|
// SaveAsDialogTitle
|
||||||
|
// NOTE: Exact same translation as FileSaveAs, and both should be kept in sync.
|
||||||
|
[
|
||||||
|
/* en */ "Save As…",
|
||||||
|
/* de */ "Speichern unter…",
|
||||||
|
/* es */ "Guardar como…",
|
||||||
|
/* fr */ "Enregistrer sous…",
|
||||||
|
/* it */ "Salva come…",
|
||||||
|
/* ja */ "名前を付けて保存…",
|
||||||
|
/* ko */ "다른 이름으로 저장…",
|
||||||
|
/* pt_br */ "Salvar como…",
|
||||||
|
/* ru */ "Сохранить как…",
|
||||||
|
/* zh_hans */ "另存为…",
|
||||||
|
/* zh_hant */ "另存新檔…",
|
||||||
|
],
|
||||||
|
// SaveAsDialogFilenameLabel
|
||||||
|
[
|
||||||
|
/* en */ "Filename:",
|
||||||
|
/* de */ "Dateiname:",
|
||||||
|
/* es */ "Nombre de archivo:",
|
||||||
|
/* fr */ "Nom de fichier :",
|
||||||
|
/* it */ "Nome del file:",
|
||||||
|
/* ja */ "ファイル名:",
|
||||||
|
/* ko */ "파일 이름:",
|
||||||
|
/* pt_br */ "Nome do arquivo:",
|
||||||
|
/* ru */ "Имя файла:",
|
||||||
|
/* zh_hans */ "文件名:",
|
||||||
|
/* zh_hant */ "檔案名稱:",
|
||||||
|
],
|
||||||
|
];
|
||||||
|
|
||||||
|
static mut S_LANG: LangId = LangId::en;
|
||||||
|
|
||||||
|
pub fn init() {
|
||||||
|
let langs = sys::preferred_languages();
|
||||||
|
let mut lang = LangId::en;
|
||||||
|
|
||||||
|
for l in langs {
|
||||||
|
lang = match l.as_str() {
|
||||||
|
"en" => LangId::en,
|
||||||
|
"de" => LangId::de,
|
||||||
|
"es" => LangId::es,
|
||||||
|
"fr" => LangId::fr,
|
||||||
|
"it" => LangId::it,
|
||||||
|
"ja" => LangId::ja,
|
||||||
|
"ko" => LangId::ko,
|
||||||
|
"pt-br" => LangId::pt_br,
|
||||||
|
"ru" => LangId::ru,
|
||||||
|
"zh-hant" => LangId::zh_hant,
|
||||||
|
"zh" => LangId::zh_hans,
|
||||||
|
_ => continue,
|
||||||
|
};
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
S_LANG = lang;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn loc(id: LocId) -> &'static str {
|
||||||
|
S_LANG_LUT[id as usize][unsafe { S_LANG as usize }]
|
||||||
|
}
|
1067
src/main.rs
Normal file
1067
src/main.rs
Normal file
File diff suppressed because it is too large
Load diff
491
src/memchr.rs
Normal file
491
src/memchr.rs
Normal file
|
@ -0,0 +1,491 @@
|
||||||
|
//! Rust has a very popular `memchr` crate. It's quite fast, so you may ask yourself
|
||||||
|
//! why we don't just use it: Simply put, this is optimized for short inputs.
|
||||||
|
|
||||||
|
use std::ptr::null;
|
||||||
|
|
||||||
|
/// memchr(), but with two needles.
|
||||||
|
/// Returns the index of the first occurrence of either needle in the `haystack`.
|
||||||
|
/// If no needle is found, `haystack.len()` is returned.
|
||||||
|
/// `offset` specifies the index to start searching from.
|
||||||
|
pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8], offset: usize) -> usize {
|
||||||
|
unsafe {
|
||||||
|
let beg = haystack.as_ptr();
|
||||||
|
let end = beg.add(haystack.len());
|
||||||
|
let it = beg.add(offset.min(haystack.len()));
|
||||||
|
let it = memchr2_raw(needle1, needle2, it, end);
|
||||||
|
distance(it, beg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// In order to make `memchr2_raw` slim and fast, we use a function pointer that updates
|
||||||
|
// itself to the correct implementation on the first call. This reduces binary size.
|
||||||
|
// It would also reduce branches if we had >2 implementations (a jump still needs to be predicted).
|
||||||
|
// NOTE that this ONLY works if Control Flow Guard is disabled on Windows.
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
static mut MEMCHR2_DISPATCH: unsafe fn(
|
||||||
|
needle1: u8,
|
||||||
|
needle2: u8,
|
||||||
|
beg: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
) -> *const u8 = memchr2_dispatch;
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
unsafe fn memchr2_dispatch(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 {
|
||||||
|
let func = if is_x86_feature_detected!("avx2") {
|
||||||
|
memchr2_avx2
|
||||||
|
} else {
|
||||||
|
memchr2_fallback
|
||||||
|
};
|
||||||
|
unsafe { MEMCHR2_DISPATCH = func };
|
||||||
|
unsafe { func(needle1, needle2, beg, end) }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn memchr2_raw(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 {
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
return unsafe { MEMCHR2_DISPATCH(needle1, needle2, beg, end) };
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
return unsafe { memchr2_neon(needle1, needle2, beg, end) };
|
||||||
|
|
||||||
|
#[allow(unreachable_code)]
|
||||||
|
return unsafe { memchr2_fallback(needle1, needle2, beg, end) };
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn memchr2_fallback(
|
||||||
|
needle1: u8,
|
||||||
|
needle2: u8,
|
||||||
|
mut beg: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
) -> *const u8 {
|
||||||
|
unsafe {
|
||||||
|
while beg != end {
|
||||||
|
let ch = *beg;
|
||||||
|
if ch == needle1 || ch == needle2 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
beg = beg.add(1);
|
||||||
|
}
|
||||||
|
beg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FWIW, I found that adding support for AVX512 was not useful at the time,
|
||||||
|
// as it only marginally improved file load performance by <5%.
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
unsafe fn memchr2_avx2(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 {
|
||||||
|
unsafe {
|
||||||
|
use std::arch::x86_64::*;
|
||||||
|
|
||||||
|
let n1 = _mm256_set1_epi8(needle1 as i8);
|
||||||
|
let n2 = _mm256_set1_epi8(needle2 as i8);
|
||||||
|
let mut remaining = distance(end, beg);
|
||||||
|
|
||||||
|
while remaining >= 32 {
|
||||||
|
let v = _mm256_loadu_si256(beg as *const _);
|
||||||
|
let a = _mm256_cmpeq_epi8(v, n1);
|
||||||
|
let b = _mm256_cmpeq_epi8(v, n2);
|
||||||
|
let c = _mm256_or_si256(a, b);
|
||||||
|
let m = _mm256_movemask_epi8(c) as u32;
|
||||||
|
|
||||||
|
if m != 0 {
|
||||||
|
return beg.add(m.trailing_zeros() as usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
beg = beg.add(32);
|
||||||
|
remaining -= 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
memchr2_fallback(needle1, needle2, beg, end)
|
||||||
|
|
||||||
|
// TODO: This code probably works correctly but requires more testing.
|
||||||
|
/*
|
||||||
|
// Handle the remaining <32 bytes by reading 32 bytes and masking out the irrelevant data.
|
||||||
|
// This works, because x86 does not care about slice boundaries. It does care about page boundaries, however.
|
||||||
|
if remaining > 0 {
|
||||||
|
// Data beyond the beg/end range may not be mapped in. As such, we need to avoid reading beyond the
|
||||||
|
// page boundaries. This assumes 4KiB pages or larger. If we're in the lower half of the 4KiB page,
|
||||||
|
// we load data from `end.sub(off) == end.sub(remaining) == beg`, since we know that this 32-byte read
|
||||||
|
// can't possibly read 2KiB. Otherwise, we load from `end.sub(off) == end.sub(32)`, which essentially
|
||||||
|
// means we read such that the end of the read is aligned with the end of the haystack. The start of the
|
||||||
|
// SIMD register will then contain garbage we must ignore.
|
||||||
|
let off = if ((beg as usize) & 2048) != 0 {
|
||||||
|
32
|
||||||
|
} else {
|
||||||
|
remaining
|
||||||
|
};
|
||||||
|
|
||||||
|
let v = _mm256_loadu_si256(end.sub(off) as *const _);
|
||||||
|
let a = _mm256_cmpeq_epi8(v, n1);
|
||||||
|
let b = _mm256_cmpeq_epi8(v, n2);
|
||||||
|
let c = _mm256_or_si256(a, b);
|
||||||
|
let m = _mm256_movemask_epi8(c) as u32;
|
||||||
|
|
||||||
|
// If we were in the upper half of the 4KiB page, we must shift the mask such that it's not aligned with
|
||||||
|
// the end of the haystack but rather with the current `beg`: A shift of `32 - remaining` is needed,
|
||||||
|
// which equals `off - remaining`. Otherwise, we must not shift at all. Luckily `off` will be `remaining`
|
||||||
|
// in that case and `remaining - remaining` is 0.
|
||||||
|
let m = m >> (off - remaining);
|
||||||
|
|
||||||
|
// If we were in the lower half of the 4KiB page, we must mask out anything beyond the end of
|
||||||
|
// the haystack. Here, we basically restrict the "length" if `m` to contain `remaining`-many bits.
|
||||||
|
// In case of a read in the upper half this won't do anything, but that's fine. Branchless code is great.
|
||||||
|
let m = m & ((1 << remaining) - 1);
|
||||||
|
|
||||||
|
if m != 0 {
|
||||||
|
return beg.add(m.trailing_zeros() as usize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
end
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
unsafe fn memchr2_neon(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 {
|
||||||
|
unsafe {
|
||||||
|
use std::arch::aarch64::*;
|
||||||
|
|
||||||
|
if distance(end, beg) >= 16 {
|
||||||
|
let n1 = vdupq_n_u8(needle1);
|
||||||
|
let n2 = vdupq_n_u8(needle2);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let v = vld1q_u8(beg as *const _);
|
||||||
|
let a = vceqq_u8(v, n1);
|
||||||
|
let b = vceqq_u8(v, n2);
|
||||||
|
let c = vorrq_u8(a, b);
|
||||||
|
|
||||||
|
// https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
||||||
|
let m = vreinterpretq_u16_u8(c);
|
||||||
|
let m = vshrn_n_u16(m, 4);
|
||||||
|
let m = vreinterpret_u64_u8(m);
|
||||||
|
let m = vget_lane_u64(m, 0);
|
||||||
|
|
||||||
|
if m != 0 {
|
||||||
|
return beg.add(m.trailing_zeros() as usize >> 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
beg = beg.add(16);
|
||||||
|
if distance(end, beg) < 16 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memchr2_fallback(needle1, needle2, beg, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Same as `memchr2`, but searches from the end of the haystack.
|
||||||
|
/// If no needle is found, 0 is returned.
|
||||||
|
///
|
||||||
|
/// *NOTE: Unlike `memchr2` (or `memrchr`), an offset PAST the hit is returned.*
|
||||||
|
/// This is because this function is primarily used for `ucd::newlines_backward`,
|
||||||
|
/// which needs exactly that.
|
||||||
|
pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8], offset: usize) -> Option<usize> {
|
||||||
|
unsafe {
|
||||||
|
let beg = haystack.as_ptr();
|
||||||
|
let it = beg.add(offset.min(haystack.len()));
|
||||||
|
let it = memrchr2_raw(needle1, needle2, beg, it);
|
||||||
|
if it.is_null() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(distance(it, beg))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
static mut MEMRCHR2_DISPATCH: unsafe fn(
|
||||||
|
needle1: u8,
|
||||||
|
needle2: u8,
|
||||||
|
beg: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
) -> *const u8 = memrchr2_dispatch;
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
unsafe fn memrchr2_dispatch(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 {
|
||||||
|
let func = if is_x86_feature_detected!("avx2") {
|
||||||
|
memrchr2_avx2
|
||||||
|
} else {
|
||||||
|
memrchr2_fallback
|
||||||
|
};
|
||||||
|
unsafe { MEMRCHR2_DISPATCH = func };
|
||||||
|
unsafe { func(needle1, needle2, beg, end) }
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn memrchr2_raw(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 {
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
return unsafe { MEMRCHR2_DISPATCH(needle1, needle2, beg, end) };
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
return unsafe { memrchr2_neon(needle1, needle2, beg, end) };
|
||||||
|
|
||||||
|
#[allow(unreachable_code)]
|
||||||
|
return unsafe { memrchr2_fallback(needle1, needle2, beg, end) };
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn memrchr2_fallback(
|
||||||
|
needle1: u8,
|
||||||
|
needle2: u8,
|
||||||
|
beg: *const u8,
|
||||||
|
mut end: *const u8,
|
||||||
|
) -> *const u8 {
|
||||||
|
unsafe {
|
||||||
|
while end != beg {
|
||||||
|
end = end.sub(1);
|
||||||
|
let ch = *end;
|
||||||
|
if ch == needle1 || needle2 == ch {
|
||||||
|
return end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
null()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
#[target_feature(enable = "avx2")]
|
||||||
|
unsafe fn memrchr2_avx2(needle1: u8, needle2: u8, beg: *const u8, mut end: *const u8) -> *const u8 {
|
||||||
|
unsafe {
|
||||||
|
use std::arch::x86_64::*;
|
||||||
|
|
||||||
|
if distance(end, beg) >= 32 {
|
||||||
|
let n1 = _mm256_set1_epi8(needle1 as i8);
|
||||||
|
let n2 = _mm256_set1_epi8(needle2 as i8);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
end = end.sub(32);
|
||||||
|
|
||||||
|
let v = _mm256_loadu_si256(end as *const _);
|
||||||
|
let a = _mm256_cmpeq_epi8(v, n1);
|
||||||
|
let b = _mm256_cmpeq_epi8(v, n2);
|
||||||
|
let c = _mm256_or_si256(a, b);
|
||||||
|
let m = _mm256_movemask_epi8(c) as u32;
|
||||||
|
|
||||||
|
if m != 0 {
|
||||||
|
return end.add(31 - m.leading_zeros() as usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
if distance(end, beg) < 32 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memrchr2_fallback(needle1, needle2, beg, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_arch = "aarch64")]
|
||||||
|
unsafe fn memrchr2_neon(needle1: u8, needle2: u8, beg: *const u8, mut end: *const u8) -> *const u8 {
|
||||||
|
unsafe {
|
||||||
|
use std::arch::aarch64::*;
|
||||||
|
|
||||||
|
if distance(end, beg) >= 16 {
|
||||||
|
let n1 = vdupq_n_u8(needle1);
|
||||||
|
let n2 = vdupq_n_u8(needle2);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
end = end.sub(16);
|
||||||
|
|
||||||
|
let v = vld1q_u8(end as *const _);
|
||||||
|
let a = vceqq_u8(v, n1);
|
||||||
|
let b = vceqq_u8(v, n2);
|
||||||
|
let c = vorrq_u8(a, b);
|
||||||
|
|
||||||
|
// https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
||||||
|
let m = vreinterpretq_u16_u8(c);
|
||||||
|
let m = vshrn_n_u16(m, 4);
|
||||||
|
let m = vreinterpret_u64_u8(m);
|
||||||
|
let m = vget_lane_u64(m, 0);
|
||||||
|
|
||||||
|
if m != 0 {
|
||||||
|
return end.add(15 - (m.leading_zeros() as usize >> 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
if distance(end, beg) < 16 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
memrchr2_fallback(needle1, needle2, beg, end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*pub struct Memchr2<'a> {
|
||||||
|
needle1: u8,
|
||||||
|
needle2: u8,
|
||||||
|
beg: *const u8,
|
||||||
|
end: *const u8,
|
||||||
|
it: *const u8,
|
||||||
|
_marker: PhantomData<&'a [u8]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Memchr2<'a> {
|
||||||
|
pub fn new(needle1: u8, needle2: u8, haystack: &'a [u8]) -> Self {
|
||||||
|
Self {
|
||||||
|
needle1,
|
||||||
|
needle2,
|
||||||
|
beg: haystack.as_ptr(),
|
||||||
|
end: unsafe { haystack.as_ptr().add(haystack.len()) },
|
||||||
|
it: haystack.as_ptr(),
|
||||||
|
_marker: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for Memchr2<'_> {
|
||||||
|
type Item = usize;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.it.is_null() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.it = unsafe { memchr2_raw(self.needle1, self.needle2, self.it, self.end) };
|
||||||
|
if self.it.is_null() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let idx = unsafe { distance(self.it, self.beg) };
|
||||||
|
self.it = if self.it == self.end {
|
||||||
|
null()
|
||||||
|
} else {
|
||||||
|
unsafe { self.it.add(1) }
|
||||||
|
};
|
||||||
|
Some(idx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FusedIterator for Memchr2<'_> {}
|
||||||
|
|
||||||
|
pub struct memrchr2<'a> {
|
||||||
|
needle1: u8,
|
||||||
|
needle2: u8,
|
||||||
|
beg: *const u8,
|
||||||
|
it: *const u8,
|
||||||
|
_marker: PhantomData<&'a [u8]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> memrchr2<'a> {
|
||||||
|
pub fn new(needle1: u8, needle2: u8, haystack: &'a [u8]) -> Self {
|
||||||
|
Self {
|
||||||
|
needle1,
|
||||||
|
needle2,
|
||||||
|
beg: haystack.as_ptr(),
|
||||||
|
it: unsafe { haystack.as_ptr().add(haystack.len()) },
|
||||||
|
_marker: PhantomData,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for memrchr2<'_> {
|
||||||
|
type Item = usize;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.it.is_null() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.it = unsafe { memrchr2_raw(self.needle1, self.needle2, self.beg, self.it) };
|
||||||
|
if self.it.is_null() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let idx = unsafe { distance(self.it, self.beg) };
|
||||||
|
self.it = if self.it == self.beg {
|
||||||
|
null()
|
||||||
|
} else {
|
||||||
|
unsafe { self.it.sub(1) }
|
||||||
|
};
|
||||||
|
Some(idx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FusedIterator for memrchr2<'_> {}*/
|
||||||
|
|
||||||
|
// Can be replaced with `sub_ptr` once it's stabilized.
|
||||||
|
#[inline(always)]
|
||||||
|
unsafe fn distance<T>(hi: *const T, lo: *const T) -> usize {
|
||||||
|
unsafe { usize::try_from(hi.offset_from(lo)).unwrap_unchecked() }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::sys;
|
||||||
|
use std::slice;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_memchr2_empty() {
|
||||||
|
assert_eq!(memchr2(b'a', b'b', b"", 0), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty() {
|
||||||
|
assert_eq!(memrchr2(b'a', b'b', b"", 0), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_basic() {
|
||||||
|
let haystack = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
||||||
|
let haystack = &haystack[..43];
|
||||||
|
|
||||||
|
assert_eq!(memchr2(b'a', b'z', haystack, 0), 0);
|
||||||
|
assert_eq!(memchr2(b'p', b'q', haystack, 0), 15);
|
||||||
|
assert_eq!(memchr2(b'Q', b'Z', haystack, 0), 42);
|
||||||
|
assert_eq!(memchr2(b'0', b'9', haystack, 0), haystack.len());
|
||||||
|
|
||||||
|
assert_eq!(memrchr2(b'Q', b'P', haystack, 43), Some(42));
|
||||||
|
assert_eq!(memrchr2(b'p', b'o', haystack, 43), Some(15));
|
||||||
|
assert_eq!(memrchr2(b'a', b'b', haystack, 43), Some(1));
|
||||||
|
assert_eq!(memrchr2(b'0', b'9', haystack, 43), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test that it doesn't match before/after the start offset respectively.
|
||||||
|
#[test]
|
||||||
|
fn test_with_offset() {
|
||||||
|
let haystack = b"abcdefghabcdefghabcdefghabcdefghabcdefgh";
|
||||||
|
|
||||||
|
assert_eq!(memrchr2(b'h', b'g', haystack, 40), Some(39));
|
||||||
|
assert_eq!(memrchr2(b'h', b'g', haystack, 39), Some(38));
|
||||||
|
assert_eq!(memrchr2(b'a', b'b', haystack, 9), Some(8));
|
||||||
|
assert_eq!(memrchr2(b'a', b'b', haystack, 1), Some(0));
|
||||||
|
assert_eq!(memrchr2(b'a', b'b', haystack, 0), None);
|
||||||
|
|
||||||
|
assert_eq!(memchr2(b'a', b'b', haystack, 0), 0);
|
||||||
|
assert_eq!(memchr2(b'a', b'b', haystack, 1), 1);
|
||||||
|
assert_eq!(memchr2(b'a', b'b', haystack, 2), 8);
|
||||||
|
assert_eq!(memchr2(b'a', b'b', haystack, 9), 9);
|
||||||
|
assert_eq!(memchr2(b'a', b'b', haystack, 16), 16);
|
||||||
|
assert_eq!(memchr2(b'a', b'b', haystack, 41), 40);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test memory access safety at page boundaries.
|
||||||
|
// The test is a success if it doesn't segfault.
|
||||||
|
#[test]
|
||||||
|
fn test_page_boundary() {
|
||||||
|
let page = unsafe {
|
||||||
|
let page_size = 4096;
|
||||||
|
|
||||||
|
// 3 pages: uncommitted, committed, uncommitted
|
||||||
|
let ptr = sys::virtual_reserve(page_size * 3).unwrap() as *mut u8;
|
||||||
|
sys::virtual_commit(ptr.add(page_size), page_size).unwrap();
|
||||||
|
slice::from_raw_parts_mut(ptr.add(page_size), page_size)
|
||||||
|
};
|
||||||
|
|
||||||
|
page.fill(b'a');
|
||||||
|
|
||||||
|
// Test if it seeks beyond the page boundary.
|
||||||
|
assert_eq!(memchr2(b'\0', b'\0', &page[page.len() - 40..], 0), 40);
|
||||||
|
// Test if it seeks before the page boundary for the masked/partial load.
|
||||||
|
assert_eq!(memchr2(b'\0', b'\0', &page[..10], 0), 10);
|
||||||
|
|
||||||
|
// Same as above, but for memrchr2 (hence reversed).
|
||||||
|
assert_eq!(memrchr2(b'\0', b'\0', &page[page.len() - 10..], 10), None);
|
||||||
|
assert_eq!(memrchr2(b'\0', b'\0', &page[..40], 40), None);
|
||||||
|
}
|
||||||
|
}
|
10
src/sys.rs
Normal file
10
src/sys.rs
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#[cfg(unix)]
|
||||||
|
mod unix;
|
||||||
|
#[cfg(windows)]
|
||||||
|
#[macro_use]
|
||||||
|
mod windows;
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub use unix::*;
|
||||||
|
#[cfg(windows)]
|
||||||
|
pub use windows::*;
|
353
src/sys/unix.rs
Normal file
353
src/sys/unix.rs
Normal file
|
@ -0,0 +1,353 @@
|
||||||
|
use crate::apperr;
|
||||||
|
use std::ffi::{CStr, c_int, c_void};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{ErrorKind, Read, Write};
|
||||||
|
use std::mem::{self, ManuallyDrop, MaybeUninit};
|
||||||
|
use std::os::fd::FromRawFd;
|
||||||
|
use std::ptr::{null, null_mut};
|
||||||
|
use std::thread;
|
||||||
|
use std::time;
|
||||||
|
|
||||||
|
pub fn preferred_languages() -> Vec<String> {
|
||||||
|
let mut locales = Vec::new();
|
||||||
|
|
||||||
|
for key in ["LANGUAGE", "LC_ALL", "LANG"] {
|
||||||
|
if let Ok(val) = std::env::var(key) {
|
||||||
|
locales.extend(
|
||||||
|
val.split(':')
|
||||||
|
.filter(|val| !val.is_empty())
|
||||||
|
.map(String::from),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
locales
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" fn sigwinch_handler(_: libc::c_int) {
|
||||||
|
unsafe {
|
||||||
|
STATE.inject_resize = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init() -> apperr::Result<()> {
|
||||||
|
unsafe {
|
||||||
|
// Reopen stdin/stdout if they're redirected.
|
||||||
|
if libc::isatty(STATE.stdin) == 0 {
|
||||||
|
STATE.stdin = check_int_return(libc::open(c"/dev/tty".as_ptr(), libc::O_RDONLY))?;
|
||||||
|
}
|
||||||
|
if libc::isatty(STATE.stdout) == 0 {
|
||||||
|
STATE.stdout = check_int_return(libc::open(c"/dev/tty".as_ptr(), libc::O_WRONLY))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
check_int_return(libc::tcgetattr(
|
||||||
|
STATE.stdout,
|
||||||
|
&raw mut STATE.stdout_initial_termios,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
let mut termios = STATE.stdout_initial_termios;
|
||||||
|
termios.c_lflag &= !(libc::ICANON | libc::ECHO);
|
||||||
|
check_int_return(libc::tcsetattr(STATE.stdout, libc::TCSANOW, &termios))?;
|
||||||
|
|
||||||
|
// Set STATE.inject_resize to true whenever we get a SIGWINCH.
|
||||||
|
let mut sigwinch_action: libc::sigaction = mem::zeroed();
|
||||||
|
sigwinch_action.sa_sigaction = sigwinch_handler as libc::sighandler_t;
|
||||||
|
check_int_return(libc::sigaction(
|
||||||
|
libc::SIGWINCH,
|
||||||
|
&sigwinch_action,
|
||||||
|
null_mut(),
|
||||||
|
))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit() {
|
||||||
|
unsafe {
|
||||||
|
libc::tcsetattr(
|
||||||
|
STATE.stdout,
|
||||||
|
libc::TCSANOW,
|
||||||
|
&raw mut STATE.stdout_initial_termios,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn inject_window_size_into_stdin() {
|
||||||
|
unsafe {
|
||||||
|
STATE.inject_resize = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_window_size() -> (u16, u16) {
|
||||||
|
let mut w = 0;
|
||||||
|
let mut h = 0;
|
||||||
|
|
||||||
|
for attempt in 1.. {
|
||||||
|
let winsz = unsafe {
|
||||||
|
let mut winsz: libc::winsize = mem::zeroed();
|
||||||
|
libc::ioctl(STATE.stdout, libc::TIOCGWINSZ, &raw mut winsz);
|
||||||
|
winsz
|
||||||
|
};
|
||||||
|
|
||||||
|
w = winsz.ws_col;
|
||||||
|
h = winsz.ws_row;
|
||||||
|
if w != 0 && h != 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if attempt == 10 {
|
||||||
|
w = 80;
|
||||||
|
h = 24;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some terminals are bad emulators and don't report TIOCGWINSZ immediately.
|
||||||
|
thread::sleep(time::Duration::from_millis(10 * attempt));
|
||||||
|
}
|
||||||
|
|
||||||
|
(w, h)
|
||||||
|
}
|
||||||
|
|
||||||
|
struct State {
|
||||||
|
stdin: libc::c_int,
|
||||||
|
stdout: libc::c_int,
|
||||||
|
stdout_initial_termios: libc::termios,
|
||||||
|
inject_resize: bool,
|
||||||
|
// Buffer for incomplete UTF-8 sequences (max 4 bytes needed)
|
||||||
|
utf8_buf: [u8; 4],
|
||||||
|
utf8_len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
static mut STATE: State = State {
|
||||||
|
stdin: libc::STDIN_FILENO,
|
||||||
|
stdout: libc::STDOUT_FILENO,
|
||||||
|
stdout_initial_termios: unsafe { mem::zeroed() },
|
||||||
|
inject_resize: false,
|
||||||
|
utf8_buf: [0; 4],
|
||||||
|
utf8_len: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Reads from stdin.
|
||||||
|
///
|
||||||
|
/// Returns `None` if there was an error reading from stdin.
|
||||||
|
/// Returns `Some("")` if the given timeout was reached.
|
||||||
|
/// Otherwise, it returns the read, non-empty string.
|
||||||
|
pub fn read_stdin(timeout: Option<time::Duration>) -> Option<String> {
|
||||||
|
unsafe {
|
||||||
|
if let Some(timeout) = timeout {
|
||||||
|
let mut pollfd = libc::pollfd {
|
||||||
|
fd: STATE.stdin,
|
||||||
|
events: libc::POLLIN,
|
||||||
|
revents: 0,
|
||||||
|
};
|
||||||
|
let ts = libc::timespec {
|
||||||
|
tv_sec: timeout.as_secs() as libc::time_t,
|
||||||
|
tv_nsec: timeout.subsec_nanos() as libc::c_long,
|
||||||
|
};
|
||||||
|
let ret = libc::ppoll(&mut pollfd, 1, &ts, null());
|
||||||
|
if ret < 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if ret == 0 {
|
||||||
|
return Some(String::new());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(invalid_value)]
|
||||||
|
let mut buf: [u8; 1024] = MaybeUninit::uninit().assume_init();
|
||||||
|
let mut read = 0;
|
||||||
|
|
||||||
|
if STATE.utf8_len != 0 {
|
||||||
|
read = STATE.utf8_len;
|
||||||
|
input[..read].copy_from_slice(&STATE.utf8_buf[..read]);
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if STATE.inject_resize {
|
||||||
|
STATE.inject_resize = false;
|
||||||
|
let (w, h) = get_window_size();
|
||||||
|
return Some(format!("\x1b[8;{};{}t", h, w));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read new data
|
||||||
|
let n = loop {
|
||||||
|
let ret = libc::read(STATE.stdin, buf.as_mut_ptr() as *mut _, buf.len());
|
||||||
|
if ret > 0 {
|
||||||
|
break ret as usize;
|
||||||
|
}
|
||||||
|
if ret == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
if *libc::__errno_location() != libc::EINTR {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Prepend any cached incomplete UTF-8 sequence
|
||||||
|
let input = if STATE.utf8_len > 0 {
|
||||||
|
let total = STATE.utf8_len + n;
|
||||||
|
let mut combined = Vec::with_capacity(total);
|
||||||
|
combined.extend_from_slice(&STATE.utf8_buf[..STATE.utf8_len]);
|
||||||
|
combined.extend_from_slice(&buf[..n]);
|
||||||
|
STATE.utf8_len = 0;
|
||||||
|
combined
|
||||||
|
} else {
|
||||||
|
buf[..n].to_vec()
|
||||||
|
};
|
||||||
|
|
||||||
|
// Find last complete UTF-8 sequence
|
||||||
|
let mut valid_end = input.len();
|
||||||
|
while valid_end > 0 && (input[valid_end - 1] & 0xC0) == 0x80 {
|
||||||
|
valid_end -= 1;
|
||||||
|
if input.len() - valid_end >= 4 || valid_end == 0 {
|
||||||
|
// Either too many trail bytes or all trail bytes - invalid UTF-8
|
||||||
|
valid_end = input.len();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache incomplete sequence if any
|
||||||
|
if valid_end < input.len() {
|
||||||
|
let remaining = input.len() - valid_end;
|
||||||
|
STATE.utf8_buf[..remaining].copy_from_slice(&input[valid_end..]);
|
||||||
|
STATE.utf8_len = remaining;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert valid portion to string
|
||||||
|
if let Ok(s) = String::from_utf8(input[..valid_end].to_vec()) {
|
||||||
|
if !s.is_empty() {
|
||||||
|
return Some(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_stdout(text: &str) {
|
||||||
|
let buf = text.as_bytes();
|
||||||
|
let mut written = 0;
|
||||||
|
|
||||||
|
while written < buf.len() {
|
||||||
|
let w = &buf[written..];
|
||||||
|
let n = unsafe { libc::write(STATE.stdout, w.as_ptr() as *const _, w.len()) };
|
||||||
|
|
||||||
|
if n >= 0 {
|
||||||
|
written += n as usize;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let err = unsafe { *libc::__errno_location() };
|
||||||
|
if err != libc::EINTR {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn open_stdin_if_redirected() -> Option<File> {
|
||||||
|
unsafe {
|
||||||
|
if libc::isatty(libc::STDIN_FILENO) == 0 {
|
||||||
|
Some(File::from_raw_fd(libc::STDIN_FILENO))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn virtual_reserve(size: usize) -> apperr::Result<*mut u8> {
|
||||||
|
unsafe {
|
||||||
|
let ptr = libc::mmap(
|
||||||
|
null_mut(),
|
||||||
|
size,
|
||||||
|
libc::PROT_NONE,
|
||||||
|
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
|
||||||
|
-1,
|
||||||
|
0,
|
||||||
|
);
|
||||||
|
if ptr == libc::MAP_FAILED {
|
||||||
|
Err(apperr::Error::new(libc::ENOMEM as u32))
|
||||||
|
} else {
|
||||||
|
Ok(ptr as *mut u8)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn virtual_release(base: *mut u8, size: usize) {
|
||||||
|
unsafe {
|
||||||
|
libc::munmap(base as *mut libc::c_void, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn virtual_commit(base: *mut u8, size: usize) -> apperr::Result<()> {
|
||||||
|
unsafe {
|
||||||
|
let status = libc::mprotect(
|
||||||
|
base as *mut libc::c_void,
|
||||||
|
size,
|
||||||
|
libc::PROT_READ | libc::PROT_WRITE,
|
||||||
|
);
|
||||||
|
if status != 0 {
|
||||||
|
Err(apperr::Error::new(libc::ENOMEM as u32))
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn load_library(name: &CStr) -> apperr::Result<*mut c_void> {
|
||||||
|
unsafe {
|
||||||
|
let handle = libc::dlopen(name.as_ptr(), libc::RTLD_LAZY);
|
||||||
|
if handle.is_null() {
|
||||||
|
Err(apperr::Error::new(libc::ELIBACC as u32))
|
||||||
|
} else {
|
||||||
|
Ok(handle)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// It'd be nice to constrain T to std::marker::FnPtr, but that's unstable.
|
||||||
|
pub unsafe fn get_proc_address<T>(handle: *mut c_void, name: &CStr) -> apperr::Result<T> {
|
||||||
|
unsafe {
|
||||||
|
let sym = libc::dlsym(handle, name.as_ptr());
|
||||||
|
if sym.is_null() {
|
||||||
|
Err(apperr::Error::new(libc::ELIBACC as u32))
|
||||||
|
} else {
|
||||||
|
Ok(mem::transmute_copy(&sym))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn load_icu() -> apperr::Result<*mut c_void> {
|
||||||
|
unsafe { load_library(c"icu.dll") }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn io_error_to_apperr(err: std::io::Error) -> apperr::Error {
|
||||||
|
unsafe { apperr::Error::new(err.raw_os_error().unwrap_or(0) as u32) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn format_error(err: apperr::Error) -> String {
|
||||||
|
let errno = err.value() & 0xFFFF;
|
||||||
|
let mut result = format!("Error {}", errno);
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
let ptr = libc::strerror(errno as i32);
|
||||||
|
if !ptr.is_null() {
|
||||||
|
let msg = CStr::from_ptr(ptr).to_string_lossy();
|
||||||
|
result.push_str(": ");
|
||||||
|
result.push_str(&msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn errno_to_apperr(no: c_int) -> apperr::Error {
|
||||||
|
unsafe { apperr::Error::new(no.max(1) as u32) }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_int_return(ret: libc::c_int) -> apperr::Result<libc::c_int> {
|
||||||
|
if ret < 0 {
|
||||||
|
Err(errno_to_apperr(unsafe { *libc::__errno_location() }))
|
||||||
|
} else {
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
}
|
524
src/sys/windows.rs
Normal file
524
src/sys/windows.rs
Normal file
|
@ -0,0 +1,524 @@
|
||||||
|
use crate::helpers::{CoordType, Size};
|
||||||
|
use crate::{apperr, helpers};
|
||||||
|
use std::ffi::CStr;
|
||||||
|
use std::fmt::Write as _;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::mem::MaybeUninit;
|
||||||
|
use std::os::windows::io::FromRawHandle;
|
||||||
|
use std::ptr::{null, null_mut};
|
||||||
|
use std::{mem, time};
|
||||||
|
use windows_sys::Win32::Foundation;
|
||||||
|
use windows_sys::Win32::Globalization;
|
||||||
|
use windows_sys::Win32::Storage::FileSystem;
|
||||||
|
use windows_sys::Win32::System::Console;
|
||||||
|
use windows_sys::Win32::System::Diagnostics::Debug;
|
||||||
|
use windows_sys::Win32::System::IO;
|
||||||
|
use windows_sys::Win32::System::LibraryLoader;
|
||||||
|
use windows_sys::Win32::System::Memory;
|
||||||
|
use windows_sys::Win32::System::Threading;
|
||||||
|
use windows_sys::w;
|
||||||
|
|
||||||
|
pub fn preferred_languages() -> Vec<String> {
|
||||||
|
unsafe {
|
||||||
|
const LEN: usize = 256;
|
||||||
|
|
||||||
|
let mut lang_num = 0;
|
||||||
|
let mut lang_buf = [const { MaybeUninit::<u16>::uninit() }; LEN];
|
||||||
|
let mut lang_buf_len = lang_buf.len() as u32;
|
||||||
|
if Globalization::GetUserPreferredUILanguages(
|
||||||
|
Globalization::MUI_LANGUAGE_NAME,
|
||||||
|
&mut lang_num,
|
||||||
|
lang_buf[0].as_mut_ptr(),
|
||||||
|
&mut lang_buf_len,
|
||||||
|
) == 0
|
||||||
|
|| lang_num == 0
|
||||||
|
{
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drop the terminating double-null character.
|
||||||
|
lang_buf_len = lang_buf_len.saturating_sub(1);
|
||||||
|
|
||||||
|
let mut lang_buf_utf8 = [const { MaybeUninit::<u8>::uninit() }; 3 * LEN];
|
||||||
|
let lang_buf_utf8_len = Globalization::WideCharToMultiByte(
|
||||||
|
Globalization::CP_UTF8,
|
||||||
|
0,
|
||||||
|
lang_buf[0].as_mut_ptr(),
|
||||||
|
lang_buf_len as i32,
|
||||||
|
lang_buf_utf8[0].as_mut_ptr(),
|
||||||
|
lang_buf_utf8.len() as i32,
|
||||||
|
null(),
|
||||||
|
null_mut(),
|
||||||
|
);
|
||||||
|
if lang_buf_utf8_len == 0 {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = helpers::str_from_raw_parts_mut(
|
||||||
|
lang_buf_utf8[0].as_mut_ptr(),
|
||||||
|
lang_buf_utf8_len as usize,
|
||||||
|
);
|
||||||
|
result.make_ascii_lowercase();
|
||||||
|
result.split_terminator('\0').map(String::from).collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ReadConsoleInputExW = unsafe extern "system" fn(
|
||||||
|
h_console_input: Foundation::HANDLE,
|
||||||
|
lp_buffer: *mut Console::INPUT_RECORD,
|
||||||
|
n_length: u32,
|
||||||
|
lp_number_of_events_read: *mut u32,
|
||||||
|
w_flags: u16,
|
||||||
|
) -> Foundation::BOOL;
|
||||||
|
|
||||||
|
const CONSOLE_READ_NOWAIT: u16 = 0x0002;
|
||||||
|
|
||||||
|
struct State {
|
||||||
|
read_console_input_ex: ReadConsoleInputExW,
|
||||||
|
stdin: Foundation::HANDLE,
|
||||||
|
stdout: Foundation::HANDLE,
|
||||||
|
stdin_cp_old: u32,
|
||||||
|
stdout_cp_old: u32,
|
||||||
|
stdin_mode_old: u32,
|
||||||
|
stdout_mode_old: u32,
|
||||||
|
leading_surrogate: u16,
|
||||||
|
inject_resize: bool,
|
||||||
|
wants_exit: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
static mut STATE: State = State {
|
||||||
|
read_console_input_ex: read_console_input_ex_placeholder,
|
||||||
|
stdin: null_mut(),
|
||||||
|
stdout: null_mut(),
|
||||||
|
stdin_cp_old: 0,
|
||||||
|
stdout_cp_old: 0,
|
||||||
|
stdin_mode_old: 0,
|
||||||
|
stdout_mode_old: 0,
|
||||||
|
leading_surrogate: 0,
|
||||||
|
inject_resize: false,
|
||||||
|
wants_exit: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
unsafe extern "system" fn read_console_input_ex_placeholder(
|
||||||
|
_: Foundation::HANDLE,
|
||||||
|
_: *mut Console::INPUT_RECORD,
|
||||||
|
_: u32,
|
||||||
|
_: *mut u32,
|
||||||
|
_: u16,
|
||||||
|
) -> Foundation::BOOL {
|
||||||
|
panic!();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "system" fn console_ctrl_handler(_ctrl_type: u32) -> Foundation::BOOL {
|
||||||
|
unsafe {
|
||||||
|
STATE.wants_exit = true;
|
||||||
|
IO::CancelIoEx(STATE.stdin, null());
|
||||||
|
}
|
||||||
|
1
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init() -> apperr::Result<()> {
|
||||||
|
unsafe {
|
||||||
|
let kernel32 = LibraryLoader::GetModuleHandleW(w!("kernel32.dll"));
|
||||||
|
STATE.read_console_input_ex = get_proc_address(kernel32, c"ReadConsoleInputExW")?;
|
||||||
|
|
||||||
|
check_bool_return(Console::SetConsoleCtrlHandler(
|
||||||
|
Some(console_ctrl_handler),
|
||||||
|
1,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
STATE.stdin = FileSystem::CreateFileW(
|
||||||
|
w!("CONIN$"),
|
||||||
|
Foundation::GENERIC_READ | Foundation::GENERIC_WRITE,
|
||||||
|
FileSystem::FILE_SHARE_READ | FileSystem::FILE_SHARE_WRITE,
|
||||||
|
null_mut(),
|
||||||
|
FileSystem::OPEN_EXISTING,
|
||||||
|
0,
|
||||||
|
null_mut(),
|
||||||
|
);
|
||||||
|
STATE.stdout = FileSystem::CreateFileW(
|
||||||
|
w!("CONOUT$"),
|
||||||
|
Foundation::GENERIC_READ | Foundation::GENERIC_WRITE,
|
||||||
|
FileSystem::FILE_SHARE_READ | FileSystem::FILE_SHARE_WRITE,
|
||||||
|
null_mut(),
|
||||||
|
FileSystem::OPEN_EXISTING,
|
||||||
|
0,
|
||||||
|
null_mut(),
|
||||||
|
);
|
||||||
|
if STATE.stdin == Foundation::INVALID_HANDLE_VALUE
|
||||||
|
|| STATE.stdout == Foundation::INVALID_HANDLE_VALUE
|
||||||
|
{
|
||||||
|
return Err(get_last_error());
|
||||||
|
}
|
||||||
|
|
||||||
|
STATE.stdin_cp_old = Console::GetConsoleCP();
|
||||||
|
STATE.stdout_cp_old = Console::GetConsoleOutputCP();
|
||||||
|
check_bool_return(Console::GetConsoleMode(
|
||||||
|
STATE.stdin,
|
||||||
|
&raw mut STATE.stdin_mode_old,
|
||||||
|
))?;
|
||||||
|
check_bool_return(Console::GetConsoleMode(
|
||||||
|
STATE.stdout,
|
||||||
|
&raw mut STATE.stdout_mode_old,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
check_bool_return(Console::SetConsoleCP(Globalization::CP_UTF8))?;
|
||||||
|
check_bool_return(Console::SetConsoleOutputCP(Globalization::CP_UTF8))?;
|
||||||
|
check_bool_return(Console::SetConsoleMode(
|
||||||
|
STATE.stdin,
|
||||||
|
Console::ENABLE_WINDOW_INPUT
|
||||||
|
| Console::ENABLE_EXTENDED_FLAGS
|
||||||
|
| Console::ENABLE_VIRTUAL_TERMINAL_INPUT,
|
||||||
|
))?;
|
||||||
|
check_bool_return(Console::SetConsoleMode(
|
||||||
|
STATE.stdout,
|
||||||
|
Console::ENABLE_PROCESSED_OUTPUT
|
||||||
|
| Console::ENABLE_WRAP_AT_EOL_OUTPUT
|
||||||
|
| Console::ENABLE_VIRTUAL_TERMINAL_PROCESSING
|
||||||
|
| Console::DISABLE_NEWLINE_AUTO_RETURN,
|
||||||
|
))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn deinit() {
|
||||||
|
unsafe {
|
||||||
|
Console::SetConsoleCP(STATE.stdin_cp_old);
|
||||||
|
Console::SetConsoleOutputCP(STATE.stdout_cp_old);
|
||||||
|
Console::SetConsoleMode(STATE.stdin, STATE.stdin_mode_old);
|
||||||
|
Console::SetConsoleMode(STATE.stdout, STATE.stdout_mode_old);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn inject_window_size_into_stdin() {
|
||||||
|
unsafe {
|
||||||
|
STATE.inject_resize = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_console_size() -> Option<Size> {
|
||||||
|
unsafe {
|
||||||
|
let mut info: Console::CONSOLE_SCREEN_BUFFER_INFOEX = mem::zeroed();
|
||||||
|
info.cbSize = mem::size_of::<Console::CONSOLE_SCREEN_BUFFER_INFOEX>() as u32;
|
||||||
|
if Console::GetConsoleScreenBufferInfoEx(STATE.stdout, &mut info) == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let w = (info.srWindow.Right - info.srWindow.Left + 1).max(1) as CoordType;
|
||||||
|
let h = (info.srWindow.Bottom - info.srWindow.Top + 1).max(1) as CoordType;
|
||||||
|
Some(Size {
|
||||||
|
width: w,
|
||||||
|
height: h,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reads from stdin.
|
||||||
|
///
|
||||||
|
/// Returns `None` if there was an error reading from stdin.
|
||||||
|
/// Returns `Some("")` if the given timeout was reached.
|
||||||
|
/// Otherwise, it returns the read, non-empty string.
|
||||||
|
pub fn read_stdin(timeout: Option<time::Duration>) -> Option<String> {
|
||||||
|
let mut input_buf = [const { MaybeUninit::<Console::INPUT_RECORD>::uninit() }; 1024];
|
||||||
|
let mut input_buf_cap = input_buf.len();
|
||||||
|
let mut utf16_buf = [const { MaybeUninit::<u16>::uninit() }; 1024];
|
||||||
|
let mut utf16_buf_len = 0;
|
||||||
|
let mut resize_event = None;
|
||||||
|
let mut read_more = true;
|
||||||
|
let mut read_poll = timeout.is_some();
|
||||||
|
|
||||||
|
if unsafe { STATE.inject_resize } {
|
||||||
|
resize_event = get_console_size();
|
||||||
|
read_poll = true;
|
||||||
|
unsafe { STATE.inject_resize = false };
|
||||||
|
}
|
||||||
|
|
||||||
|
if unsafe { STATE.leading_surrogate } != 0 {
|
||||||
|
utf16_buf[0] = MaybeUninit::new(unsafe { STATE.leading_surrogate });
|
||||||
|
utf16_buf_len = 1;
|
||||||
|
input_buf_cap -= 1;
|
||||||
|
unsafe { STATE.leading_surrogate = 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(timeout) = timeout {
|
||||||
|
let wait_result =
|
||||||
|
unsafe { Threading::WaitForSingleObject(STATE.stdin, timeout.as_millis() as u32) };
|
||||||
|
match wait_result {
|
||||||
|
// Ready to read? Continue with reading below.
|
||||||
|
// `read_more` is already true to ensure we don't block.
|
||||||
|
Foundation::WAIT_OBJECT_0 => {}
|
||||||
|
// Timeout? Skip reading entirely.
|
||||||
|
Foundation::WAIT_TIMEOUT => read_more = false,
|
||||||
|
// Error? Tell the caller stdin is broken.
|
||||||
|
_ => return None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This loops exists, just in case there's events in the input buffer that we aren't interested in.
|
||||||
|
// It should be rare for this to loop.
|
||||||
|
while read_more {
|
||||||
|
let input = unsafe {
|
||||||
|
// If we had a `inject_resize`, we don't want to block indefinitely for other pending input on startup,
|
||||||
|
// but are still interested in any other pending input that may be waiting for us.
|
||||||
|
let flags = if read_poll { CONSOLE_READ_NOWAIT } else { 0 };
|
||||||
|
let mut read = 0;
|
||||||
|
let ok = (STATE.read_console_input_ex)(
|
||||||
|
STATE.stdin,
|
||||||
|
input_buf[0].as_mut_ptr(),
|
||||||
|
input_buf_cap as u32,
|
||||||
|
&mut read,
|
||||||
|
flags,
|
||||||
|
);
|
||||||
|
if ok == 0 || STATE.wants_exit {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
&*(&input_buf[..read as usize] as *const _ as *const [Console::INPUT_RECORD])
|
||||||
|
};
|
||||||
|
|
||||||
|
for inp in input {
|
||||||
|
match inp.EventType as u32 {
|
||||||
|
Console::KEY_EVENT => {
|
||||||
|
let event = unsafe { &inp.Event.KeyEvent };
|
||||||
|
let ch = unsafe { event.uChar.UnicodeChar };
|
||||||
|
if event.bKeyDown != 0 && ch != 0 {
|
||||||
|
utf16_buf[utf16_buf_len] = MaybeUninit::new(ch);
|
||||||
|
utf16_buf_len += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Console::WINDOW_BUFFER_SIZE_EVENT => {
|
||||||
|
let event = unsafe { &inp.Event.WindowBufferSizeEvent };
|
||||||
|
let w = event.dwSize.X as CoordType;
|
||||||
|
let h = event.dwSize.Y as CoordType;
|
||||||
|
// Windows is prone to sending broken/useless `WINDOW_BUFFER_SIZE_EVENT`s.
|
||||||
|
// E.g. starting conhost will emit 3 in a row. Skip rendering in that case.
|
||||||
|
if w > 0 && h > 0 {
|
||||||
|
resize_event = Some(Size {
|
||||||
|
width: w,
|
||||||
|
height: h,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
read_more = !resize_event.is_some() && utf16_buf_len == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const RESIZE_EVENT_FMT_MAX_LEN: usize = 16; // "\x1b[8;65535;65535t"
|
||||||
|
let resize_event_len = if resize_event.is_some() {
|
||||||
|
RESIZE_EVENT_FMT_MAX_LEN
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
// +1 to account for a potential `STATE.leading_surrogate`.
|
||||||
|
let utf8_max_len = (utf16_buf_len + 1) * 3;
|
||||||
|
let mut text = String::with_capacity(utf8_max_len + resize_event_len);
|
||||||
|
|
||||||
|
if let Some(resize_event) = resize_event {
|
||||||
|
// If I read xterm's documentation correctly, CSI 18 t reports the window size in characters.
|
||||||
|
// CSI 8 ; height ; width t is the response. Of course, we didn't send the request,
|
||||||
|
// but we can use this fake response to trigger the editor to resize itself.
|
||||||
|
_ = write!(
|
||||||
|
text,
|
||||||
|
"\x1b[8;{};{}t",
|
||||||
|
resize_event.height, resize_event.width
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the input ends with a lone lead surrogate, we need to remember it for the next read.
|
||||||
|
if utf16_buf_len > 0 {
|
||||||
|
unsafe {
|
||||||
|
let last_char = utf16_buf[utf16_buf_len - 1].assume_init();
|
||||||
|
if 0xD800 <= last_char && last_char <= 0xDBFF {
|
||||||
|
STATE.leading_surrogate = last_char;
|
||||||
|
utf16_buf_len -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the remaining input to UTF8, the sane encoding.
|
||||||
|
if utf16_buf_len > 0 {
|
||||||
|
unsafe {
|
||||||
|
let vec = text.as_mut_vec();
|
||||||
|
let spare = vec.spare_capacity_mut();
|
||||||
|
|
||||||
|
let len = Globalization::WideCharToMultiByte(
|
||||||
|
Globalization::CP_UTF8,
|
||||||
|
0,
|
||||||
|
utf16_buf[0].as_ptr(),
|
||||||
|
utf16_buf_len as i32,
|
||||||
|
spare.as_mut_ptr() as *mut _,
|
||||||
|
spare.len() as i32,
|
||||||
|
null(),
|
||||||
|
null_mut(),
|
||||||
|
);
|
||||||
|
|
||||||
|
if len > 0 {
|
||||||
|
vec.set_len(vec.len() + len as usize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn write_stdout(text: &str) {
|
||||||
|
unsafe {
|
||||||
|
let mut offset = 0;
|
||||||
|
|
||||||
|
while offset < text.len() {
|
||||||
|
let ptr = text.as_ptr().add(offset);
|
||||||
|
let write = (text.len() - offset).min(1024 * 1024 * 1024) as u32;
|
||||||
|
let mut written = 0;
|
||||||
|
let ok = FileSystem::WriteFile(STATE.stdout, ptr, write, &mut written, null_mut());
|
||||||
|
offset += written as usize;
|
||||||
|
if ok == 0 || written == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn open_stdin_if_redirected() -> Option<File> {
|
||||||
|
unsafe {
|
||||||
|
let handle = Console::GetStdHandle(Console::STD_INPUT_HANDLE);
|
||||||
|
match FileSystem::GetFileType(handle) {
|
||||||
|
FileSystem::FILE_TYPE_DISK | FileSystem::FILE_TYPE_PIPE => {
|
||||||
|
Some(File::from_raw_handle(handle))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn virtual_reserve(size: usize) -> apperr::Result<*mut u8> {
|
||||||
|
unsafe {
|
||||||
|
let mut base = null_mut();
|
||||||
|
|
||||||
|
if cfg!(debug_assertions) {
|
||||||
|
static mut S_BASE_GEN: usize = 0x0000100000000000;
|
||||||
|
S_BASE_GEN += 0x0000100000000000;
|
||||||
|
base = S_BASE_GEN as *mut _;
|
||||||
|
}
|
||||||
|
|
||||||
|
check_ptr_return(Memory::VirtualAlloc(
|
||||||
|
base,
|
||||||
|
size,
|
||||||
|
Memory::MEM_RESERVE,
|
||||||
|
Memory::PAGE_READWRITE,
|
||||||
|
) as *mut u8)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn virtual_release(base: *mut u8, size: usize) {
|
||||||
|
unsafe {
|
||||||
|
Memory::VirtualFree(base as *mut _, size, Memory::MEM_RELEASE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn virtual_commit(base: *mut u8, size: usize) -> apperr::Result<()> {
|
||||||
|
unsafe {
|
||||||
|
check_ptr_return(Memory::VirtualAlloc(
|
||||||
|
base as *mut _,
|
||||||
|
size,
|
||||||
|
Memory::MEM_COMMIT,
|
||||||
|
Memory::PAGE_READWRITE,
|
||||||
|
))
|
||||||
|
.map(|_| ())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn load_library(name: *const u16) -> apperr::Result<Foundation::HMODULE> {
|
||||||
|
unsafe {
|
||||||
|
check_ptr_return(LibraryLoader::LoadLibraryExW(
|
||||||
|
name,
|
||||||
|
null_mut(),
|
||||||
|
LibraryLoader::LOAD_LIBRARY_SEARCH_SYSTEM32,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// It'd be nice to constrain T to std::marker::FnPtr, but that's unstable.
|
||||||
|
pub unsafe fn get_proc_address<T>(handle: Foundation::HMODULE, name: &CStr) -> apperr::Result<T> {
|
||||||
|
unsafe {
|
||||||
|
let ptr = LibraryLoader::GetProcAddress(handle, name.as_ptr() as *const u8);
|
||||||
|
if let Some(ptr) = ptr {
|
||||||
|
Ok(mem::transmute_copy(&ptr))
|
||||||
|
} else {
|
||||||
|
Err(get_last_error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn load_icu() -> apperr::Result<Foundation::HMODULE> {
|
||||||
|
unsafe { load_library(w!("icu.dll")) }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cold]
|
||||||
|
fn get_last_error() -> apperr::Error {
|
||||||
|
unsafe { gle_to_apperr(Foundation::GetLastError()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn gle_to_apperr(gle: u32) -> apperr::Error {
|
||||||
|
unsafe {
|
||||||
|
apperr::Error::new(if gle == 0 {
|
||||||
|
0x8000FFFF
|
||||||
|
} else {
|
||||||
|
0x80070000 | gle
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn io_error_to_apperr(err: std::io::Error) -> apperr::Error {
|
||||||
|
gle_to_apperr(err.raw_os_error().unwrap_or(0) as u32)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn format_error(err: apperr::Error) -> String {
|
||||||
|
unsafe {
|
||||||
|
let mut ptr: *mut u8 = null_mut();
|
||||||
|
let len = Debug::FormatMessageA(
|
||||||
|
Debug::FORMAT_MESSAGE_ALLOCATE_BUFFER
|
||||||
|
| Debug::FORMAT_MESSAGE_FROM_SYSTEM
|
||||||
|
| Debug::FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||||
|
null(),
|
||||||
|
err.value() as u32,
|
||||||
|
0,
|
||||||
|
&mut ptr as *mut *mut _ as *mut _,
|
||||||
|
0,
|
||||||
|
null_mut(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut result = format!("Error {:#08x}", err.value());
|
||||||
|
|
||||||
|
if len > 0 {
|
||||||
|
let msg = helpers::str_from_raw_parts(ptr, len as usize);
|
||||||
|
let msg = msg.trim_ascii();
|
||||||
|
let msg = msg.replace(['\r', '\n'], " ");
|
||||||
|
result.push_str(": ");
|
||||||
|
result.push_str(&msg);
|
||||||
|
Foundation::LocalFree(ptr as *mut _);
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_bool_return(ret: Foundation::BOOL) -> apperr::Result<()> {
|
||||||
|
if ret == 0 {
|
||||||
|
Err(get_last_error())
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_ptr_return<T>(ret: *mut T) -> apperr::Result<*mut T> {
|
||||||
|
if ret.is_null() {
|
||||||
|
Err(get_last_error())
|
||||||
|
} else {
|
||||||
|
Ok(ret)
|
||||||
|
}
|
||||||
|
}
|
7
src/trust_me_bro.rs
Normal file
7
src/trust_me_bro.rs
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
pub fn this_lifetime_change_is_totally_safe<'a, T: ?Sized>(x: &T) -> &'a T {
|
||||||
|
unsafe { std::mem::transmute(x) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn this_lifetime_change_is_totally_safe_mut<'a, T: ?Sized>(x: &mut T) -> &'a mut T {
|
||||||
|
unsafe { std::mem::transmute(x) }
|
||||||
|
}
|
2958
src/tui.rs
Normal file
2958
src/tui.rs
Normal file
File diff suppressed because it is too large
Load diff
705
src/ucd.rs
Normal file
705
src/ucd.rs
Normal file
|
@ -0,0 +1,705 @@
|
||||||
|
use crate::helpers::{CoordType, Point};
|
||||||
|
use crate::memchr::{memchr2, memrchr2};
|
||||||
|
use crate::ucd_gen::*;
|
||||||
|
use crate::utf8::Utf8Chars;
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
|
pub trait Document {
|
||||||
|
fn read_backward(&self, off: usize) -> &[u8];
|
||||||
|
fn read_forward(&self, off: usize) -> &[u8];
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Document for &[u8] {
|
||||||
|
fn read_backward(&self, off: usize) -> &[u8] {
|
||||||
|
let s = *self;
|
||||||
|
&s[..off.min(s.len())]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_forward(&self, off: usize) -> &[u8] {
|
||||||
|
let s = *self;
|
||||||
|
&s[off.min(s.len())..]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Default)]
|
||||||
|
pub struct UcdCursor {
|
||||||
|
/// Offset in bytes within the buffer.
|
||||||
|
pub offset: usize,
|
||||||
|
/// Position in the buffer in lines (.y) and grapheme clusters (.x).
|
||||||
|
/// Line wrapping has NO influence on this.
|
||||||
|
pub logical_pos: Point,
|
||||||
|
/// Position in the buffer in laid out rows (.y) and columns (.x).
|
||||||
|
/// Line wrapping has an influence on this.
|
||||||
|
pub visual_pos: Point,
|
||||||
|
/// Horizontal position in visual columns.
|
||||||
|
/// Line wrapping has NO influence on this and if word wrap is disabled,
|
||||||
|
/// it's identical to `visual_pos.x`. This is useful for calculating tab widths.
|
||||||
|
pub column: CoordType,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WrapOpportunity {
|
||||||
|
absolute_offset: usize,
|
||||||
|
offset_next_cluster: usize,
|
||||||
|
props_next_cluster: usize,
|
||||||
|
logical_pos_x: CoordType,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct MeasurementConfig<'doc> {
|
||||||
|
buffer: &'doc dyn Document,
|
||||||
|
tab_size: CoordType,
|
||||||
|
word_wrap_column: CoordType,
|
||||||
|
cursor: UcdCursor,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'doc> MeasurementConfig<'doc> {
|
||||||
|
pub fn new(buffer: &'doc dyn Document) -> Self {
|
||||||
|
Self {
|
||||||
|
buffer,
|
||||||
|
tab_size: 8,
|
||||||
|
word_wrap_column: CoordType::MAX,
|
||||||
|
cursor: UcdCursor::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_tab_size(mut self, tab_size: CoordType) -> Self {
|
||||||
|
self.tab_size = tab_size;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_word_wrap_column(mut self, word_wrap_column: CoordType) -> Self {
|
||||||
|
self.word_wrap_column = word_wrap_column;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_cursor(mut self, cursor: UcdCursor) -> Self {
|
||||||
|
self.cursor = cursor;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn goto_offset(&mut self, offset: usize) -> UcdCursor {
|
||||||
|
self.cursor = Self::measure_forward(
|
||||||
|
self.tab_size,
|
||||||
|
self.word_wrap_column,
|
||||||
|
offset,
|
||||||
|
Point::MAX,
|
||||||
|
Point::MAX,
|
||||||
|
self.cursor,
|
||||||
|
self.buffer,
|
||||||
|
);
|
||||||
|
self.cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn goto_logical(&mut self, logical_target: Point) -> UcdCursor {
|
||||||
|
self.cursor = Self::measure_forward(
|
||||||
|
self.tab_size,
|
||||||
|
self.word_wrap_column,
|
||||||
|
usize::MAX,
|
||||||
|
logical_target,
|
||||||
|
Point::MAX,
|
||||||
|
self.cursor,
|
||||||
|
self.buffer,
|
||||||
|
);
|
||||||
|
self.cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn goto_visual(&mut self, visual_target: Point) -> UcdCursor {
|
||||||
|
self.cursor = Self::measure_forward(
|
||||||
|
self.tab_size,
|
||||||
|
self.word_wrap_column,
|
||||||
|
usize::MAX,
|
||||||
|
Point::MAX,
|
||||||
|
visual_target,
|
||||||
|
self.cursor,
|
||||||
|
self.buffer,
|
||||||
|
);
|
||||||
|
self.cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cursor(&self) -> UcdCursor {
|
||||||
|
self.cursor
|
||||||
|
}
|
||||||
|
|
||||||
|
fn measure_forward(
|
||||||
|
tab_size: CoordType,
|
||||||
|
word_wrap_column: CoordType,
|
||||||
|
offset_target: usize,
|
||||||
|
logical_target: Point,
|
||||||
|
visual_target: Point,
|
||||||
|
cursor: UcdCursor,
|
||||||
|
buffer: &dyn Document,
|
||||||
|
) -> UcdCursor {
|
||||||
|
if cursor.logical_pos >= logical_target || cursor.visual_pos >= visual_target {
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut wrap: Option<WrapOpportunity> = None;
|
||||||
|
let mut hit: Option<UcdCursor> = None;
|
||||||
|
let mut absolute_offset = cursor.offset;
|
||||||
|
let mut logical_pos_x = cursor.logical_pos.x;
|
||||||
|
let mut logical_pos_y = cursor.logical_pos.y;
|
||||||
|
let mut visual_pos_x = cursor.visual_pos.x;
|
||||||
|
let mut visual_pos_y = cursor.visual_pos.y;
|
||||||
|
let mut column = cursor.column;
|
||||||
|
let (mut offset_target_x, mut logical_target_x, mut visual_target_x) = Self::recalc_target(
|
||||||
|
offset_target,
|
||||||
|
logical_target,
|
||||||
|
visual_target,
|
||||||
|
logical_pos_y,
|
||||||
|
visual_pos_y,
|
||||||
|
);
|
||||||
|
|
||||||
|
'outer: loop {
|
||||||
|
let chunk = buffer.read_forward(absolute_offset);
|
||||||
|
let chunk_beg = absolute_offset;
|
||||||
|
let chunk_end = absolute_offset + chunk.len();
|
||||||
|
let mut it = Utf8Chars::new(chunk, 0);
|
||||||
|
let Some(mut ch) = it.next() else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut props_next_cluster = ucd_grapheme_cluster_lookup(ch);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if absolute_offset >= chunk_end {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if absolute_offset >= offset_target_x
|
||||||
|
|| logical_pos_x >= logical_target_x
|
||||||
|
|| visual_pos_x >= visual_target_x
|
||||||
|
{
|
||||||
|
if wrap.is_none() {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
|
||||||
|
hit = Some(UcdCursor {
|
||||||
|
offset: absolute_offset,
|
||||||
|
logical_pos: Point {
|
||||||
|
x: logical_pos_x,
|
||||||
|
y: logical_pos_y,
|
||||||
|
},
|
||||||
|
visual_pos: Point {
|
||||||
|
x: visual_pos_x,
|
||||||
|
y: visual_pos_y,
|
||||||
|
},
|
||||||
|
column,
|
||||||
|
});
|
||||||
|
// Prevent hits on the same line until we encounter a line wrap or explicit newline.
|
||||||
|
offset_target_x = usize::MAX;
|
||||||
|
logical_target_x = CoordType::MAX;
|
||||||
|
visual_target_x = CoordType::MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
let props_current_cluster = props_next_cluster;
|
||||||
|
let is_tab = ch == '\t';
|
||||||
|
let mut offset_next_cluster;
|
||||||
|
let mut width = 0;
|
||||||
|
let mut state = 0;
|
||||||
|
|
||||||
|
// Figure out the length and width of the rest of the grapheme cluster.
|
||||||
|
loop {
|
||||||
|
offset_next_cluster = it.offset();
|
||||||
|
width += ucd_grapheme_cluster_character_width(props_next_cluster) as CoordType;
|
||||||
|
|
||||||
|
let Some(ch_next) = it.next() else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
ch = ch_next;
|
||||||
|
let props_trail = ucd_grapheme_cluster_lookup(ch);
|
||||||
|
state = ucd_grapheme_cluster_joins(state, props_next_cluster, props_trail);
|
||||||
|
props_next_cluster = props_trail;
|
||||||
|
|
||||||
|
if ucd_grapheme_cluster_joins_done(state) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let offset_next_cluster = chunk_beg + offset_next_cluster;
|
||||||
|
|
||||||
|
if is_tab {
|
||||||
|
// Tabs require special handling because they can have a variable width.
|
||||||
|
width = tab_size - (column % tab_size);
|
||||||
|
} else {
|
||||||
|
width = width.min(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hard wrap: Both the logical and visual position advance by one line.
|
||||||
|
if ucd_grapheme_cluster_is_newline(props_current_cluster) {
|
||||||
|
// Don't cross the newline if the target is on this line.
|
||||||
|
// E.g. if the callers asks for column 100 on a 10 column line,
|
||||||
|
// we'll return with the cursor set to column 10.
|
||||||
|
if logical_pos_y >= logical_target.y || visual_pos_y >= visual_target.y {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
|
||||||
|
logical_pos_x = 0;
|
||||||
|
logical_pos_y += 1;
|
||||||
|
visual_pos_x = 0;
|
||||||
|
visual_pos_y += 1;
|
||||||
|
column = 0;
|
||||||
|
// We moved the logical/visual pos past the newline,
|
||||||
|
// so we also need to move the offset past it.
|
||||||
|
absolute_offset = offset_next_cluster;
|
||||||
|
(offset_target_x, logical_target_x, visual_target_x) = Self::recalc_target(
|
||||||
|
offset_target,
|
||||||
|
logical_target,
|
||||||
|
visual_target,
|
||||||
|
logical_pos_y,
|
||||||
|
visual_pos_y,
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Line/word-wrap handling.
|
||||||
|
if word_wrap_column != CoordType::MAX && visual_pos_x + width > word_wrap_column {
|
||||||
|
// Reset to the last break opportunity, if there was any.
|
||||||
|
if let Some(ref w) = wrap {
|
||||||
|
absolute_offset = w.absolute_offset;
|
||||||
|
it.seek(w.offset_next_cluster);
|
||||||
|
props_next_cluster = w.props_next_cluster;
|
||||||
|
logical_pos_x = w.logical_pos_x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrap!
|
||||||
|
visual_pos_x = 0;
|
||||||
|
visual_pos_y += 1;
|
||||||
|
(offset_target_x, logical_target_x, visual_target_x) = Self::recalc_target(
|
||||||
|
offset_target,
|
||||||
|
logical_target,
|
||||||
|
visual_target,
|
||||||
|
logical_pos_y,
|
||||||
|
visual_pos_y,
|
||||||
|
);
|
||||||
|
wrap = None;
|
||||||
|
hit = None;
|
||||||
|
|
||||||
|
if absolute_offset < chunk_beg {
|
||||||
|
// We've had to reset to a point before this chunk,
|
||||||
|
// so we have to re-read the previous contents.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid advancing past the visual target, because `width` can be greater than 1.
|
||||||
|
if visual_pos_x + width > visual_target_x {
|
||||||
|
if word_wrap_column == CoordType::MAX || wrap.is_none() {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
|
||||||
|
hit = Some(UcdCursor {
|
||||||
|
offset: absolute_offset,
|
||||||
|
logical_pos: Point {
|
||||||
|
x: logical_pos_x,
|
||||||
|
y: logical_pos_y,
|
||||||
|
},
|
||||||
|
visual_pos: Point {
|
||||||
|
x: visual_pos_x,
|
||||||
|
y: visual_pos_y,
|
||||||
|
},
|
||||||
|
column,
|
||||||
|
});
|
||||||
|
// Prevent hits on the same line until we encounter a line wrap or explicit newline.
|
||||||
|
offset_target_x = usize::MAX;
|
||||||
|
logical_target_x = CoordType::MAX;
|
||||||
|
visual_target_x = CoordType::MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
absolute_offset = offset_next_cluster;
|
||||||
|
logical_pos_x += 1;
|
||||||
|
visual_pos_x += width;
|
||||||
|
column += width;
|
||||||
|
|
||||||
|
if word_wrap_column != CoordType::MAX
|
||||||
|
&& !ucd_line_break_joins(props_current_cluster, props_next_cluster)
|
||||||
|
{
|
||||||
|
if hit.is_some() {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
wrap = Some(WrapOpportunity {
|
||||||
|
absolute_offset,
|
||||||
|
offset_next_cluster: it.offset(),
|
||||||
|
props_next_cluster,
|
||||||
|
logical_pos_x,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if visual_pos_x >= word_wrap_column {
|
||||||
|
visual_pos_x = 0;
|
||||||
|
visual_pos_y += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(c) = hit {
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
UcdCursor {
|
||||||
|
offset: absolute_offset,
|
||||||
|
logical_pos: Point {
|
||||||
|
x: logical_pos_x,
|
||||||
|
y: logical_pos_y,
|
||||||
|
},
|
||||||
|
visual_pos: Point {
|
||||||
|
x: visual_pos_x,
|
||||||
|
y: visual_pos_y,
|
||||||
|
},
|
||||||
|
column,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn recalc_target(
|
||||||
|
offset_target: usize,
|
||||||
|
logical_target: Point,
|
||||||
|
visual_target: Point,
|
||||||
|
logical_pos_y: CoordType,
|
||||||
|
visual_pos_y: CoordType,
|
||||||
|
) -> (usize, CoordType, CoordType) {
|
||||||
|
(
|
||||||
|
offset_target,
|
||||||
|
Self::target_column(logical_target, logical_pos_y),
|
||||||
|
Self::target_column(visual_target, visual_pos_y),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn target_column(target: Point, y: CoordType) -> CoordType {
|
||||||
|
match y.cmp(&target.y) {
|
||||||
|
Ordering::Less => CoordType::MAX,
|
||||||
|
Ordering::Equal => target.x,
|
||||||
|
Ordering::Greater => 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum CharClass {
|
||||||
|
Whitespace,
|
||||||
|
Newline,
|
||||||
|
Separator,
|
||||||
|
Word,
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn construct_classifier(seperators: &[u8]) -> [CharClass; 256] {
|
||||||
|
let mut classifier = [CharClass::Word; 256];
|
||||||
|
|
||||||
|
classifier[b' ' as usize] = CharClass::Whitespace;
|
||||||
|
classifier[b'\t' as usize] = CharClass::Whitespace;
|
||||||
|
classifier[b'\n' as usize] = CharClass::Newline;
|
||||||
|
classifier[b'\r' as usize] = CharClass::Newline;
|
||||||
|
|
||||||
|
let mut i = 0;
|
||||||
|
let len = seperators.len();
|
||||||
|
while i < len {
|
||||||
|
let ch = seperators[i];
|
||||||
|
assert!(ch < 128, "Only ASCII separators are supported.");
|
||||||
|
classifier[ch as usize] = CharClass::Separator;
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
classifier
|
||||||
|
}
|
||||||
|
|
||||||
|
const WORD_CLASSIFIER: [CharClass; 256] =
|
||||||
|
construct_classifier(br#"`~!@#$%^&*()-=+[{]}\|;:'",.<>/?"#);
|
||||||
|
|
||||||
|
/// Finds the next word boundary given a document cursor offset.
|
||||||
|
/// Returns the offset of the next word boundary.
|
||||||
|
pub fn word_forward(doc: &dyn Document, offset: usize) -> usize {
|
||||||
|
word_navigation(WordForward {
|
||||||
|
doc,
|
||||||
|
offset,
|
||||||
|
chunk: &[],
|
||||||
|
chunk_off: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The backward version of `word_forward`.
|
||||||
|
pub fn word_backward(doc: &dyn Document, offset: usize) -> usize {
|
||||||
|
word_navigation(WordBackward {
|
||||||
|
doc,
|
||||||
|
offset,
|
||||||
|
chunk: &[],
|
||||||
|
chunk_off: 0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Word navigation implementation. Matches the behavior of VS Code.
|
||||||
|
fn word_navigation<T: WordNavigation>(mut nav: T) -> usize {
|
||||||
|
// First skip one newline, if any.
|
||||||
|
nav.skip_newline();
|
||||||
|
|
||||||
|
// Skip any whitespace.
|
||||||
|
nav.skip_class(CharClass::Whitespace);
|
||||||
|
|
||||||
|
// Skip one word or seperator and take note of the class.
|
||||||
|
let class = nav.peek(CharClass::Whitespace);
|
||||||
|
if matches!(class, CharClass::Separator | CharClass::Word) {
|
||||||
|
nav.next();
|
||||||
|
|
||||||
|
let off = nav.offset();
|
||||||
|
|
||||||
|
// Continue skipping the same class.
|
||||||
|
nav.skip_class(class);
|
||||||
|
|
||||||
|
// If the class was a separator and we only moved one character,
|
||||||
|
// continue skipping characters of the word class.
|
||||||
|
if off == nav.offset() && class == CharClass::Separator {
|
||||||
|
nav.skip_class(CharClass::Word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nav.offset()
|
||||||
|
}
|
||||||
|
|
||||||
|
trait WordNavigation {
|
||||||
|
fn skip_newline(&mut self);
|
||||||
|
fn skip_class(&mut self, class: CharClass);
|
||||||
|
fn peek(&self, default: CharClass) -> CharClass;
|
||||||
|
fn next(&mut self);
|
||||||
|
fn offset(&self) -> usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct WordForward<'a> {
|
||||||
|
doc: &'a dyn Document,
|
||||||
|
offset: usize,
|
||||||
|
chunk: &'a [u8],
|
||||||
|
chunk_off: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WordNavigation for WordForward<'_> {
|
||||||
|
fn skip_newline(&mut self) {
|
||||||
|
// We can rely on the fact that the document does not split graphemes across chunks.
|
||||||
|
// = If there's a newline it's wholly contained in this chunk.
|
||||||
|
if self.chunk_off < self.chunk.len() && self.chunk[self.chunk_off] == b'\r' {
|
||||||
|
self.chunk_off += 1;
|
||||||
|
}
|
||||||
|
if self.chunk_off < self.chunk.len() && self.chunk[self.chunk_off] == b'\n' {
|
||||||
|
self.chunk_off += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_class(&mut self, class: CharClass) {
|
||||||
|
'outer: loop {
|
||||||
|
while self.chunk_off < self.chunk.len() {
|
||||||
|
if WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize] != class {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
self.chunk_off += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.offset += self.chunk.len();
|
||||||
|
self.chunk = self.doc.read_forward(self.offset);
|
||||||
|
self.chunk_off = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek(&self, default: CharClass) -> CharClass {
|
||||||
|
if self.chunk_off < self.chunk.len() {
|
||||||
|
WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize]
|
||||||
|
} else {
|
||||||
|
default
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next(&mut self) {
|
||||||
|
self.chunk_off += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn offset(&self) -> usize {
|
||||||
|
self.offset + self.chunk_off
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct WordBackward<'a> {
|
||||||
|
doc: &'a dyn Document,
|
||||||
|
offset: usize,
|
||||||
|
chunk: &'a [u8],
|
||||||
|
chunk_off: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WordNavigation for WordBackward<'_> {
|
||||||
|
fn skip_newline(&mut self) {
|
||||||
|
// We can rely on the fact that the document does not split graphemes across chunks.
|
||||||
|
// = If there's a newline it's wholly contained in this chunk.
|
||||||
|
if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\r' {
|
||||||
|
self.chunk_off -= 1;
|
||||||
|
}
|
||||||
|
if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\n' {
|
||||||
|
self.chunk_off -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip_class(&mut self, class: CharClass) {
|
||||||
|
'outer: loop {
|
||||||
|
while self.chunk_off > 0 {
|
||||||
|
if WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize] != class {
|
||||||
|
break 'outer;
|
||||||
|
}
|
||||||
|
self.chunk_off -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.offset -= self.chunk.len();
|
||||||
|
self.chunk = self.doc.read_backward(self.offset);
|
||||||
|
self.chunk_off = self.chunk.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek(&self, default: CharClass) -> CharClass {
|
||||||
|
if self.chunk_off > 0 {
|
||||||
|
WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize]
|
||||||
|
} else {
|
||||||
|
default
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next(&mut self) {
|
||||||
|
self.chunk_off -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn offset(&self) -> usize {
|
||||||
|
self.offset - self.chunk.len() + self.chunk_off
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn newlines_forward(
|
||||||
|
text: &[u8],
|
||||||
|
mut offset: usize,
|
||||||
|
mut line: CoordType,
|
||||||
|
line_stop: CoordType,
|
||||||
|
) -> (usize, CoordType) {
|
||||||
|
// Leaving the cursor at the beginning of the current line when the limit
|
||||||
|
// is 0 makes this function behave identical to ucd_newlines_backward.
|
||||||
|
if line >= line_stop {
|
||||||
|
return newlines_backward(text, offset, line, line_stop);
|
||||||
|
}
|
||||||
|
|
||||||
|
let len = text.len();
|
||||||
|
offset = offset.min(len);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
offset = memchr2(b'\r', b'\n', text, offset);
|
||||||
|
if offset >= len {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ch = text[offset];
|
||||||
|
offset += 1;
|
||||||
|
if ch == b'\r' && offset != len && text[offset] == b'\n' {
|
||||||
|
offset += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
line += 1;
|
||||||
|
if line >= line_stop {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(offset, line)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Seeks to the start of the given line.
|
||||||
|
// No matter what parameters are given, it only returns an offset at the start of a line.
|
||||||
|
// Put differently, even if `line == line_stop`, it'll seek backward to the line start.
|
||||||
|
pub fn newlines_backward(
|
||||||
|
text: &[u8],
|
||||||
|
mut offset: usize,
|
||||||
|
mut line: CoordType,
|
||||||
|
line_stop: CoordType,
|
||||||
|
) -> (usize, CoordType) {
|
||||||
|
offset = offset.min(text.len());
|
||||||
|
|
||||||
|
loop {
|
||||||
|
offset = match memrchr2(b'\r', b'\n', text, offset) {
|
||||||
|
Some(i) => i,
|
||||||
|
None => return (0, line),
|
||||||
|
};
|
||||||
|
if line <= line_stop {
|
||||||
|
// +1: Past the newline, at the start of the current line.
|
||||||
|
return (offset + 1, line);
|
||||||
|
}
|
||||||
|
|
||||||
|
if text[offset] == b'\n' && offset != 0 && text[offset - 1] == b'\r' {
|
||||||
|
offset -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
line -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn strip_newline(mut text: &[u8]) -> &[u8] {
|
||||||
|
// Rust generates surprisingly tight assembly for this.
|
||||||
|
if text.last() == Some(&b'\n') {
|
||||||
|
text = &text[..text.len() - 1];
|
||||||
|
}
|
||||||
|
if text.last() == Some(&b'\r') {
|
||||||
|
text = &text[..text.len() - 1];
|
||||||
|
}
|
||||||
|
text
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_measure_forward_newline_start() {
|
||||||
|
let cursor =
|
||||||
|
MeasurementConfig::new(&"foo\nbar".as_bytes()).goto_visual(Point { x: 0, y: 1 });
|
||||||
|
assert_eq!(cursor.offset, 4);
|
||||||
|
assert_eq!(cursor.logical_pos, Point { x: 0, y: 1 });
|
||||||
|
assert_eq!(cursor.visual_pos, Point { x: 0, y: 1 });
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_measure_forward_clipped_wide_char() {
|
||||||
|
let cursor = MeasurementConfig::new(&"a😶🌫️b".as_bytes()).goto_visual(Point { x: 2, y: 0 });
|
||||||
|
assert_eq!(cursor.offset, 1);
|
||||||
|
assert_eq!(cursor.logical_pos, Point { x: 1, y: 0 });
|
||||||
|
assert_eq!(cursor.visual_pos, Point { x: 1, y: 0 });
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_measure_forward_word_wrap() {
|
||||||
|
// |foo␣ |
|
||||||
|
// |bar␣ |
|
||||||
|
// |baz |
|
||||||
|
let text = "foo bar \nbaz".as_bytes();
|
||||||
|
|
||||||
|
let cursor = MeasurementConfig::new(&text)
|
||||||
|
.with_word_wrap_column(6)
|
||||||
|
.goto_logical(Point { x: 5, y: 0 });
|
||||||
|
assert_eq!(cursor.offset, 5);
|
||||||
|
assert_eq!(cursor.logical_pos, Point { x: 5, y: 0 });
|
||||||
|
assert_eq!(cursor.visual_pos, Point { x: 1, y: 1 });
|
||||||
|
|
||||||
|
let mut cfg = MeasurementConfig::new(&text).with_word_wrap_column(6);
|
||||||
|
let cursor = cfg.goto_visual(Point { x: 5, y: 0 });
|
||||||
|
assert_eq!(cursor.offset, 4);
|
||||||
|
assert_eq!(cursor.logical_pos, Point { x: 4, y: 0 });
|
||||||
|
assert_eq!(cursor.visual_pos, Point { x: 0, y: 1 });
|
||||||
|
|
||||||
|
let cursor = cfg.goto_visual(Point { x: 0, y: 1 });
|
||||||
|
assert_eq!(cursor.offset, 4);
|
||||||
|
assert_eq!(cursor.logical_pos, Point { x: 4, y: 0 });
|
||||||
|
assert_eq!(cursor.visual_pos, Point { x: 0, y: 1 });
|
||||||
|
|
||||||
|
let cursor = cfg.goto_visual(Point { x: 100, y: 1 });
|
||||||
|
assert_eq!(cursor.offset, 8);
|
||||||
|
assert_eq!(cursor.logical_pos, Point { x: 8, y: 0 });
|
||||||
|
assert_eq!(cursor.visual_pos, Point { x: 4, y: 1 });
|
||||||
|
|
||||||
|
let cursor = cfg.goto_visual(Point { x: 0, y: 2 });
|
||||||
|
assert_eq!(cursor.offset, 9);
|
||||||
|
assert_eq!(cursor.logical_pos, Point { x: 0, y: 1 });
|
||||||
|
assert_eq!(cursor.visual_pos, Point { x: 0, y: 2 });
|
||||||
|
|
||||||
|
let cursor = cfg.goto_visual(Point { x: 100, y: 2 });
|
||||||
|
assert_eq!(cursor.offset, 12);
|
||||||
|
assert_eq!(cursor.logical_pos, Point { x: 3, y: 1 });
|
||||||
|
assert_eq!(cursor.visual_pos, Point { x: 3, y: 2 });
|
||||||
|
}
|
||||||
|
}
|
1066
src/ucd_gen.rs
Normal file
1066
src/ucd_gen.rs
Normal file
File diff suppressed because it is too large
Load diff
217
src/utf8.rs
Normal file
217
src/utf8.rs
Normal file
|
@ -0,0 +1,217 @@
|
||||||
|
use crate::helpers;
|
||||||
|
use std::{hint, iter, mem};
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct Utf8Chars<'a> {
|
||||||
|
source: &'a [u8],
|
||||||
|
offset: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Utf8Chars<'a> {
|
||||||
|
pub fn new(source: &'a [u8], offset: usize) -> Self {
|
||||||
|
Self { source, offset }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn offset(&self) -> usize {
|
||||||
|
self.offset
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn seek(&mut self, offset: usize) {
|
||||||
|
self.offset = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn fffd() -> Option<char> {
|
||||||
|
// Improves performance by ~5% and reduces code size.
|
||||||
|
helpers::cold_path();
|
||||||
|
Some('\u{FFFD}')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for Utf8Chars<'_> {
|
||||||
|
type Item = char;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.offset >= self.source.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let c = self.source[self.offset];
|
||||||
|
self.offset += 1;
|
||||||
|
|
||||||
|
// See: https://datatracker.ietf.org/doc/html/rfc3629
|
||||||
|
// as well as ICU's `utf8.h` for the bitmask approach.
|
||||||
|
|
||||||
|
// UTF8-1 = %x00-7F
|
||||||
|
if (c & 0x80) == 0 {
|
||||||
|
return Some(c as char);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.offset >= self.source.len() {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut cp = c as u32;
|
||||||
|
|
||||||
|
if cp < 0xE0 {
|
||||||
|
// UTF8-2 = %xC2-DF UTF8-tail
|
||||||
|
|
||||||
|
if cp < 0xC2 {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
|
||||||
|
// The lead byte is 110xxxxx
|
||||||
|
// -> Strip off the 110 prefix
|
||||||
|
cp &= !0xE0;
|
||||||
|
} else if cp < 0xF0 {
|
||||||
|
// UTF8-3 =
|
||||||
|
// %xE0 %xA0-BF UTF8-tail
|
||||||
|
// %xE1-EC UTF8-tail UTF8-tail
|
||||||
|
// %xED %x80-9F UTF8-tail
|
||||||
|
// %xEE-EF UTF8-tail UTF8-tail
|
||||||
|
|
||||||
|
// This is a pretty neat approach seen in ICU4C, because it's a 1:1 translation of the RFC.
|
||||||
|
// I don't understand why others don't do the same thing. It's rather performant.
|
||||||
|
const BITS_80_9F: u8 = 1 << 0b100; // 0x80-9F, aka 0b100xxxxx
|
||||||
|
const BITS_A0_BF: u8 = 1 << 0b101; // 0xA0-BF, aka 0b101xxxxx
|
||||||
|
const BITS_BOTH: u8 = BITS_80_9F | BITS_A0_BF;
|
||||||
|
const LEAD_TRAIL1_BITS: [u8; 16] = [
|
||||||
|
// v-- lead byte
|
||||||
|
BITS_A0_BF, // 0xE0
|
||||||
|
BITS_BOTH, // 0xE1
|
||||||
|
BITS_BOTH, // 0xE2
|
||||||
|
BITS_BOTH, // 0xE3
|
||||||
|
BITS_BOTH, // 0xE4
|
||||||
|
BITS_BOTH, // 0xE5
|
||||||
|
BITS_BOTH, // 0xE6
|
||||||
|
BITS_BOTH, // 0xE7
|
||||||
|
BITS_BOTH, // 0xE8
|
||||||
|
BITS_BOTH, // 0xE9
|
||||||
|
BITS_BOTH, // 0xEA
|
||||||
|
BITS_BOTH, // 0xEB
|
||||||
|
BITS_BOTH, // 0xEC
|
||||||
|
BITS_80_9F, // 0xED
|
||||||
|
BITS_BOTH, // 0xEE
|
||||||
|
BITS_BOTH, // 0xEF
|
||||||
|
];
|
||||||
|
|
||||||
|
// The lead byte is 1110xxxx
|
||||||
|
// -> Strip off the 1110 prefix
|
||||||
|
cp &= !0xF0;
|
||||||
|
|
||||||
|
let t = self.source[self.offset];
|
||||||
|
if LEAD_TRAIL1_BITS[cp as usize] & (1 << (t >> 5)) == 0 {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
cp = (cp << 6) | (t as u32 & 0x3F);
|
||||||
|
|
||||||
|
self.offset += 1;
|
||||||
|
if self.offset >= self.source.len() {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// UTF8-4 =
|
||||||
|
// %xF0 %x90-BF UTF8-tail UTF8-tail
|
||||||
|
// %xF1-F3 UTF8-tail UTF8-tail UTF8-tail
|
||||||
|
// %xF4 %x80-8F UTF8-tail UTF8-tail
|
||||||
|
|
||||||
|
// This is similar to the above, but with the indices flipped:
|
||||||
|
// The trail byte is the index and the lead byte mask is the value.
|
||||||
|
// This is because the split at 0x90 requires more bits than fit into an u8.
|
||||||
|
const TRAIL1_LEAD_BITS: [u8; 16] = [
|
||||||
|
// +------ 0xF4 lead
|
||||||
|
// |+----- 0xF3 lead
|
||||||
|
// ||+---- 0xF2 lead
|
||||||
|
// |||+--- 0xF1 lead
|
||||||
|
// ||||+-- 0xF0 lead
|
||||||
|
// vvvvv
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, // trail bytes:
|
||||||
|
0b_00000, //
|
||||||
|
0b_11110, // 0x80-8F -> 0x80-8F can be preceded by 0xF1-F4
|
||||||
|
0b_01111, // 0x90-9F -v
|
||||||
|
0b_01111, // 0xA0-AF -> 0x90-BF can be preceded by 0xF0-F3
|
||||||
|
0b_01111, // 0xB0-BF -^
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, //
|
||||||
|
0b_00000, //
|
||||||
|
];
|
||||||
|
|
||||||
|
// The lead byte *may* be 11110xxx, but could also be e.g. 11111xxx.
|
||||||
|
// -> Only strip off the 1111 prefix
|
||||||
|
cp &= !0xF0;
|
||||||
|
|
||||||
|
// Now we can verify if it's actually <= 0xF4.
|
||||||
|
if cp > 4 {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
|
||||||
|
let t = self.source[self.offset];
|
||||||
|
if TRAIL1_LEAD_BITS[(t >> 4) as usize] & (1 << cp) == 0 {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
cp = (cp << 6) | (t as u32 & 0x3F);
|
||||||
|
|
||||||
|
self.offset += 1;
|
||||||
|
if self.offset >= self.source.len() {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
|
||||||
|
// UTF8-tail = %x80-BF
|
||||||
|
let t = self.source[self.offset] as u32 - 0x80;
|
||||||
|
if t > 0x3F {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
cp = (cp << 6) | t;
|
||||||
|
|
||||||
|
self.offset += 1;
|
||||||
|
if self.offset >= self.source.len() {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe { hint::assert_unchecked(self.offset < self.source.len()) };
|
||||||
|
|
||||||
|
// UTF8-tail = %x80-BF
|
||||||
|
let t = self.source[self.offset] as u32 - 0x80;
|
||||||
|
if t > 0x3F {
|
||||||
|
return Self::fffd();
|
||||||
|
}
|
||||||
|
cp = (cp << 6) | t;
|
||||||
|
|
||||||
|
self.offset += 1;
|
||||||
|
Some(unsafe { mem::transmute(cp) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl iter::FusedIterator for Utf8Chars<'_> {}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_broken_utf8() {
|
||||||
|
let source = [b'a', 0xED, 0xA0, 0x80, b'b'];
|
||||||
|
let mut chars = Utf8Chars::new(&source, 0);
|
||||||
|
let mut offset = 0;
|
||||||
|
for chunk in source.utf8_chunks() {
|
||||||
|
for ch in chunk.valid().chars() {
|
||||||
|
offset += ch.len_utf8();
|
||||||
|
assert_eq!(chars.next(), Some(ch));
|
||||||
|
assert_eq!(chars.offset(), offset);
|
||||||
|
}
|
||||||
|
if !chunk.invalid().is_empty() {
|
||||||
|
offset += chunk.invalid().len();
|
||||||
|
assert_eq!(chars.next(), Some('\u{FFFD}'));
|
||||||
|
assert_eq!(chars.offset(), offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
319
src/vt.rs
Normal file
319
src/vt.rs
Normal file
|
@ -0,0 +1,319 @@
|
||||||
|
use core::time;
|
||||||
|
|
||||||
|
use crate::memchr::memchr2;
|
||||||
|
|
||||||
|
pub enum Token<'parser, 'input> {
|
||||||
|
Text(&'input str),
|
||||||
|
Ctrl(char),
|
||||||
|
Esc(char),
|
||||||
|
SS3(char),
|
||||||
|
Csi(&'parser Csi),
|
||||||
|
Osc { data: &'input str, partial: bool },
|
||||||
|
Dcs { data: &'input str, partial: bool },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub enum State {
|
||||||
|
Ground,
|
||||||
|
Esc,
|
||||||
|
Ss3,
|
||||||
|
Csi,
|
||||||
|
Osc,
|
||||||
|
Dcs,
|
||||||
|
OscEsc,
|
||||||
|
DcsEsc,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Csi {
|
||||||
|
pub params: [i32; 32],
|
||||||
|
pub param_count: usize,
|
||||||
|
pub private_byte: char,
|
||||||
|
pub final_byte: char,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Parser {
|
||||||
|
state: State,
|
||||||
|
// Csi is not part of State, because it allows us
|
||||||
|
// to more quickly erase and reuse the struct.
|
||||||
|
csi: Csi,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
state: State::Ground,
|
||||||
|
csi: Csi {
|
||||||
|
params: [0; 32],
|
||||||
|
param_count: 0,
|
||||||
|
private_byte: '\0',
|
||||||
|
final_byte: '\0',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Suggests a timeout for the next call to `read()`.
|
||||||
|
///
|
||||||
|
/// We need this because of the ambiguouity of whether a trailing
|
||||||
|
/// escape character in an input is starting another escape sequence or
|
||||||
|
/// is just the result of the user literally pressing the Escape key.
|
||||||
|
pub fn read_timeout(&mut self) -> Option<std::time::Duration> {
|
||||||
|
match self.state {
|
||||||
|
// 100ms is a upper ceiling for a responsive feel. This uses half that,
|
||||||
|
// under the assumption that a really slow terminal needs equal amounts
|
||||||
|
// of time for I and O. Realistically though, this could be much lower.
|
||||||
|
State::Esc => Some(time::Duration::from_millis(50)),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the given input into VT sequences.
|
||||||
|
///
|
||||||
|
/// You should call this function even if your `read()`
|
||||||
|
/// had a timeout (pass an empty string in that case).
|
||||||
|
pub fn parse<'parser, 'input>(
|
||||||
|
&'parser mut self,
|
||||||
|
input: &'input str,
|
||||||
|
) -> Stream<'parser, 'input> {
|
||||||
|
Stream {
|
||||||
|
parser: self,
|
||||||
|
input,
|
||||||
|
off: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Stream<'parser, 'input> {
|
||||||
|
parser: &'parser mut Parser,
|
||||||
|
input: &'input str,
|
||||||
|
off: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Stream<'_, '_> {
|
||||||
|
/// Reads and consumes raw bytes from the input.
|
||||||
|
pub fn read(&mut self, dst: &mut [u8]) -> usize {
|
||||||
|
let bytes = self.input.as_bytes();
|
||||||
|
let off = self.off.min(bytes.len());
|
||||||
|
let len = dst.len().min(bytes.len() - off);
|
||||||
|
dst[..len].copy_from_slice(&bytes[off..off + len]);
|
||||||
|
self.off += len;
|
||||||
|
len
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the next VT sequence from the previously given input.
|
||||||
|
///
|
||||||
|
/// Can't implement Iterator, because this is a "lending iterator".
|
||||||
|
pub fn next(&mut self) -> Option<Token> {
|
||||||
|
let parser = &mut *self.parser;
|
||||||
|
let input = self.input;
|
||||||
|
let bytes = input.as_bytes();
|
||||||
|
|
||||||
|
// If the previous input ended with an escape character, `read_timeout()`
|
||||||
|
// returned `Some(..)` timeout, and if the caller did everything correctly
|
||||||
|
// and there was indeed a timeout, we should be called with an empty
|
||||||
|
// input. In that case we'll return the escape as its own token.
|
||||||
|
if input.is_empty() && matches!(parser.state, State::Esc) {
|
||||||
|
parser.state = State::Ground;
|
||||||
|
return Some(Token::Esc('\0'));
|
||||||
|
}
|
||||||
|
|
||||||
|
while self.off < bytes.len() {
|
||||||
|
match parser.state {
|
||||||
|
State::Ground => match bytes[self.off] {
|
||||||
|
0x1b => {
|
||||||
|
parser.state = State::Esc;
|
||||||
|
self.off += 1;
|
||||||
|
}
|
||||||
|
c @ (0x00..0x20 | 0x7f) => {
|
||||||
|
self.off += 1;
|
||||||
|
return Some(Token::Ctrl(c as char));
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let beg = self.off;
|
||||||
|
while {
|
||||||
|
self.off += 1;
|
||||||
|
self.off < bytes.len()
|
||||||
|
&& bytes[self.off] >= 0x20
|
||||||
|
&& bytes[self.off] != 0x7f
|
||||||
|
} {}
|
||||||
|
return Some(Token::Text(&input[beg..self.off]));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
State::Esc => {
|
||||||
|
let c = bytes[self.off];
|
||||||
|
self.off += 1;
|
||||||
|
match c {
|
||||||
|
b'[' => {
|
||||||
|
parser.state = State::Csi;
|
||||||
|
parser.csi.private_byte = '\0';
|
||||||
|
parser.csi.final_byte = '\0';
|
||||||
|
while parser.csi.param_count > 0 {
|
||||||
|
parser.csi.param_count -= 1;
|
||||||
|
parser.csi.params[parser.csi.param_count] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b']' => {
|
||||||
|
parser.state = State::Osc;
|
||||||
|
}
|
||||||
|
b'O' => {
|
||||||
|
parser.state = State::Ss3;
|
||||||
|
}
|
||||||
|
b'P' => {
|
||||||
|
parser.state = State::Dcs;
|
||||||
|
}
|
||||||
|
c => {
|
||||||
|
parser.state = State::Ground;
|
||||||
|
return Some(Token::Esc(c as char));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
State::Ss3 => {
|
||||||
|
parser.state = State::Ground;
|
||||||
|
let c = bytes[self.off];
|
||||||
|
self.off += 1;
|
||||||
|
return Some(Token::SS3(c as char));
|
||||||
|
}
|
||||||
|
State::Csi => {
|
||||||
|
loop {
|
||||||
|
// If we still have slots left, parse the parameter.
|
||||||
|
if parser.csi.param_count < parser.csi.params.len() {
|
||||||
|
let dst = &mut parser.csi.params[parser.csi.param_count];
|
||||||
|
while self.off < bytes.len()
|
||||||
|
&& bytes[self.off] >= b'0'
|
||||||
|
&& bytes[self.off] <= b'9'
|
||||||
|
{
|
||||||
|
let v = *dst * 10 + bytes[self.off] as i32 - b'0' as i32;
|
||||||
|
*dst = v.min(0xffff);
|
||||||
|
self.off += 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// ...otherwise, skip the parameters until we find the final byte.
|
||||||
|
while self.off < bytes.len()
|
||||||
|
&& bytes[self.off] >= b'0'
|
||||||
|
&& bytes[self.off] <= b'9'
|
||||||
|
{
|
||||||
|
self.off += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Encountered the end of the input before finding the final byte.
|
||||||
|
if self.off >= bytes.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let c = bytes[self.off];
|
||||||
|
self.off += 1;
|
||||||
|
|
||||||
|
match c {
|
||||||
|
0x40..=0x7e => {
|
||||||
|
parser.state = State::Ground;
|
||||||
|
parser.csi.final_byte = c as char;
|
||||||
|
if parser.csi.param_count != 0 || parser.csi.params[0] != 0 {
|
||||||
|
parser.csi.param_count += 1;
|
||||||
|
}
|
||||||
|
return Some(Token::Csi(&parser.csi));
|
||||||
|
}
|
||||||
|
b';' => parser.csi.param_count += 1,
|
||||||
|
b'<'..=b'?' => parser.csi.private_byte = c as char,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
State::Osc | State::Dcs => {
|
||||||
|
let beg = self.off;
|
||||||
|
let mut data;
|
||||||
|
let mut partial;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// Find any indication for the end of the OSC/DCS sequence.
|
||||||
|
self.off = memchr2(b'\x07', b'\x1b', bytes, self.off);
|
||||||
|
|
||||||
|
data = &input[beg..self.off];
|
||||||
|
partial = self.off >= bytes.len();
|
||||||
|
|
||||||
|
// Encountered the end of the input before finding the terminator.
|
||||||
|
if partial {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let c = bytes[self.off];
|
||||||
|
self.off += 1;
|
||||||
|
|
||||||
|
if c == 0x1b {
|
||||||
|
// It's only a string terminator if it's followed by \.
|
||||||
|
// We're at the end so we're saving the state and will continue next time.
|
||||||
|
if self.off >= bytes.len() {
|
||||||
|
parser.state = match parser.state {
|
||||||
|
State::Osc => State::OscEsc,
|
||||||
|
_ => State::DcsEsc,
|
||||||
|
};
|
||||||
|
partial = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// False alarm: Not a string terminator.
|
||||||
|
if bytes[self.off] != b'\\' {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.off += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let state = parser.state;
|
||||||
|
if !partial {
|
||||||
|
parser.state = State::Ground;
|
||||||
|
}
|
||||||
|
return match state {
|
||||||
|
State::Osc => Some(Token::Osc { data, partial }),
|
||||||
|
_ => Some(Token::Dcs { data, partial }),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
State::OscEsc | State::DcsEsc => {
|
||||||
|
// We were processing an OSC/DCS sequence and the last byte was an escape character.
|
||||||
|
// It's only a string terminator if it's followed by \ (= "\x1b\\").
|
||||||
|
if bytes[self.off] == b'\\' {
|
||||||
|
// It was indeed a string terminator and we can now tell the caller about it.
|
||||||
|
let state = parser.state;
|
||||||
|
|
||||||
|
// Consume the terminator (one byte in the previous input and this byte).
|
||||||
|
parser.state = State::Ground;
|
||||||
|
self.off += 1;
|
||||||
|
|
||||||
|
return match state {
|
||||||
|
State::OscEsc => Some(Token::Osc {
|
||||||
|
data: "",
|
||||||
|
partial: false,
|
||||||
|
}),
|
||||||
|
_ => Some(Token::Dcs {
|
||||||
|
data: "",
|
||||||
|
partial: false,
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// False alarm: Not a string terminator.
|
||||||
|
// We'll return the escape character as a separate token.
|
||||||
|
// Processing will continue from the current state (`bytes[self.off]`).
|
||||||
|
parser.state = match parser.state {
|
||||||
|
State::OscEsc => State::Osc,
|
||||||
|
_ => State::Dcs,
|
||||||
|
};
|
||||||
|
return match parser.state {
|
||||||
|
State::Osc => Some(Token::Osc {
|
||||||
|
data: "\x1b",
|
||||||
|
partial: true,
|
||||||
|
}),
|
||||||
|
_ => Some(Token::Dcs {
|
||||||
|
data: "\x1b",
|
||||||
|
partial: true,
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
11
tools/build_release_windows.bat
Normal file
11
tools/build_release_windows.bat
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
@echo off
|
||||||
|
|
||||||
|
rem Avoid linking with vcruntime140.dll by statically linking everything,
|
||||||
|
rem and then explicitly linking with ucrtbase.dll dynamically.
|
||||||
|
rem We do this, because vcruntime140.dll is an optional Windows component.
|
||||||
|
set RUSTFLAGS=-Ctarget-feature=+crt-static -Clink-args=/DEFAULTLIB:ucrt.lib -Clink-args=/NODEFAULTLIB:vcruntime.lib -Clink-args=/NODEFAULTLIB:msvcrt.lib -Clink-args=/NODEFAULTLIB:libucrt.lib
|
||||||
|
|
||||||
|
rem The backtrace code for panics in Rust is almost as large as the entire editor.
|
||||||
|
rem = Huge reduction in binary size by removing all that.
|
||||||
|
rem cargo build --release -Zbuild-std=std,panic_abort -Zbuild-std-features=panic_immediate_abort %*
|
||||||
|
cargo build --release %*
|
380
tools/grapheme-table-gen/Cargo.lock
generated
Normal file
380
tools/grapheme-table-gen/Cargo.lock
generated
Normal file
|
@ -0,0 +1,380 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "android-tzdata"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "android_system_properties"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anyhow"
|
||||||
|
version = "1.0.95"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "autocfg"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bumpalo"
|
||||||
|
version = "3.16.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cc"
|
||||||
|
version = "1.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e"
|
||||||
|
dependencies = [
|
||||||
|
"shlex",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cfg-if"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "chrono"
|
||||||
|
version = "0.4.39"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
|
||||||
|
dependencies = [
|
||||||
|
"android-tzdata",
|
||||||
|
"iana-time-zone",
|
||||||
|
"js-sys",
|
||||||
|
"num-traits",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "core-foundation-sys"
|
||||||
|
version = "0.8.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.8.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "grapheme-table-gen"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"chrono",
|
||||||
|
"indoc",
|
||||||
|
"pico-args",
|
||||||
|
"rayon",
|
||||||
|
"roxmltree",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iana-time-zone"
|
||||||
|
version = "0.1.61"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
|
||||||
|
dependencies = [
|
||||||
|
"android_system_properties",
|
||||||
|
"core-foundation-sys",
|
||||||
|
"iana-time-zone-haiku",
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
"windows-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "iana-time-zone-haiku"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indoc"
|
||||||
|
version = "2.0.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "js-sys"
|
||||||
|
version = "0.3.76"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7"
|
||||||
|
dependencies = [
|
||||||
|
"once_cell",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.169"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.22"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "num-traits"
|
||||||
|
version = "0.2.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
|
||||||
|
dependencies = [
|
||||||
|
"autocfg",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "once_cell"
|
||||||
|
version = "1.20.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pico-args"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.92"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.37"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon"
|
||||||
|
version = "1.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"rayon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-core"
|
||||||
|
version = "1.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "roxmltree"
|
||||||
|
version = "0.20.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "shlex"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.91"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d53cbcb5a243bd33b7858b1d7f4aca2153490815872d86d955d6ea29f743c035"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen"
|
||||||
|
version = "0.2.99"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"once_cell",
|
||||||
|
"wasm-bindgen-macro",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-backend"
|
||||||
|
version = "0.2.99"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79"
|
||||||
|
dependencies = [
|
||||||
|
"bumpalo",
|
||||||
|
"log",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"wasm-bindgen-shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-macro"
|
||||||
|
version = "0.2.99"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe"
|
||||||
|
dependencies = [
|
||||||
|
"quote",
|
||||||
|
"wasm-bindgen-macro-support",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-macro-support"
|
||||||
|
version = "0.2.99"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"wasm-bindgen-backend",
|
||||||
|
"wasm-bindgen-shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-shared"
|
||||||
|
version = "0.2.99"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-core"
|
||||||
|
version = "0.52.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
12
tools/grapheme-table-gen/Cargo.toml
Normal file
12
tools/grapheme-table-gen/Cargo.toml
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
[package]
|
||||||
|
name = "grapheme-table-gen"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.95"
|
||||||
|
chrono = "0.4.39"
|
||||||
|
indoc = "2.0.5"
|
||||||
|
pico-args = { version = "0.5.0", features = ["eq-separator"] }
|
||||||
|
rayon = "1.10.0"
|
||||||
|
roxmltree = { version = "0.20.0", default-features = false, features = ["std"] }
|
850
tools/grapheme-table-gen/src/main.rs
Normal file
850
tools/grapheme-table-gen/src/main.rs
Normal file
|
@ -0,0 +1,850 @@
|
||||||
|
mod rules;
|
||||||
|
|
||||||
|
use crate::rules::{JOIN_RULES_GRAPHEME_CLUSTER, JOIN_RULES_LINE_BREAK};
|
||||||
|
use anyhow::{bail, Context};
|
||||||
|
use indoc::writedoc;
|
||||||
|
use rayon::prelude::*;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fmt::Write as FmtWrite;
|
||||||
|
use std::io::Write as IoWrite;
|
||||||
|
use std::ops::RangeInclusive;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
type TrieType = u32;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
enum CharacterWidth {
|
||||||
|
ZeroWidth,
|
||||||
|
Narrow,
|
||||||
|
Wide,
|
||||||
|
Ambiguous,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
enum ClusterBreak {
|
||||||
|
Other, // GB999
|
||||||
|
Extend, // GB9, GB9a -- includes SpacingMark
|
||||||
|
RI, // GB12, GB13
|
||||||
|
Prepend, // GB9b
|
||||||
|
HangulL, // GB6, GB7, GB8
|
||||||
|
HangulV, // GB6, GB7, GB8
|
||||||
|
HangulT, // GB6, GB7, GB8
|
||||||
|
HangulLV, // GB6, GB7, GB8
|
||||||
|
HangulLVT, // GB6, GB7, GB8
|
||||||
|
InCBLinker, // GB9c
|
||||||
|
InCBConsonant, // GB9c
|
||||||
|
ExtPic, // GB11
|
||||||
|
ZWJ, // GB9, GB11
|
||||||
|
|
||||||
|
// These are intentionally ordered last, as this allows us to
|
||||||
|
// simplify the ucd_grapheme_cluster_is_newline implementation.
|
||||||
|
Control, // GB4, GB5
|
||||||
|
CR, // GB3, GB4, GB5
|
||||||
|
LF, // GB3, GB4, GB5
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
enum LineBreak {
|
||||||
|
Other, // Anything else
|
||||||
|
|
||||||
|
// Non-tailorable Line Breaking Classes
|
||||||
|
WordJoiner, // WJ
|
||||||
|
ZeroWidthSpace, // ZW
|
||||||
|
Glue, // GL
|
||||||
|
Space, // SP
|
||||||
|
|
||||||
|
// Break Opportunities
|
||||||
|
BreakAfter, // BA
|
||||||
|
BreakBefore, // BB
|
||||||
|
Hyphen, // HY
|
||||||
|
|
||||||
|
// Characters Prohibiting Certain Breaks
|
||||||
|
ClosePunctuation, // CL
|
||||||
|
CloseParenthesis_EA, // CP, East Asian
|
||||||
|
CloseParenthesis_NotEA, // CP, not East Asian
|
||||||
|
Exclamation, // EX
|
||||||
|
Inseparable, // IN
|
||||||
|
Nonstarter, // NS
|
||||||
|
OpenPunctuation_EA, // OP, East Asian
|
||||||
|
OpenPunctuation_NotEA, // OP, not East Asian
|
||||||
|
Quotation, // QU
|
||||||
|
|
||||||
|
// Numeric Context
|
||||||
|
InfixNumericSeparator, // IS
|
||||||
|
Numeric, // NU
|
||||||
|
PostfixNumeric, // PO
|
||||||
|
PrefixNumeric, // PR
|
||||||
|
SymbolsAllowingBreakAfter, // SY
|
||||||
|
|
||||||
|
// Other Characters
|
||||||
|
Alphabetic, // AL & HL
|
||||||
|
Ideographic, // ID & EB & EM
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
struct Ucd {
|
||||||
|
description: String,
|
||||||
|
values: Vec<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
struct Stage {
|
||||||
|
values: Vec<u32>,
|
||||||
|
index: usize,
|
||||||
|
shift: usize,
|
||||||
|
mask: usize,
|
||||||
|
bits: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
struct Trie {
|
||||||
|
stages: Vec<Stage>,
|
||||||
|
total_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Default)]
|
||||||
|
enum Language {
|
||||||
|
#[default]
|
||||||
|
C,
|
||||||
|
Rust,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct Output {
|
||||||
|
arg_lang: Language,
|
||||||
|
arg_no_ambiguous: bool,
|
||||||
|
arg_line_breaks: bool,
|
||||||
|
|
||||||
|
ucd: Ucd,
|
||||||
|
trie: Trie,
|
||||||
|
rules_gc: [Vec<u32>; 2],
|
||||||
|
rules_lb: Vec<u32>,
|
||||||
|
total_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
const HELP: &str = "\
|
||||||
|
Usage: grapheme-table-gen [options...] <ucd.nounihan.grouped.xml>
|
||||||
|
-h, --help Prints help information
|
||||||
|
--lang=<c|rust> Output language (default: c)
|
||||||
|
--no-ambiguous Treat all ambiguous characters as narrow
|
||||||
|
--line-breaks Store and expose line break information
|
||||||
|
";
|
||||||
|
|
||||||
|
fn main() -> anyhow::Result<()> {
|
||||||
|
let mut args = pico_args::Arguments::from_env();
|
||||||
|
if args.contains(["-h", "--help"]) {
|
||||||
|
eprint!("{}", HELP);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut out = Output {
|
||||||
|
arg_lang: args.value_from_fn("--lang", |arg| match arg {
|
||||||
|
"c" => Ok(Language::C),
|
||||||
|
"rust" => Ok(Language::Rust),
|
||||||
|
l => bail!("invalid language: \"{}\"", l),
|
||||||
|
})?,
|
||||||
|
arg_no_ambiguous: args.contains("--no-ambiguous"),
|
||||||
|
arg_line_breaks: args.contains("--line-breaks"),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let arg_input = args.free_from_os_str(|s| -> Result<PathBuf, &'static str> { Ok(s.into()) })?;
|
||||||
|
let arg_remaining = args.finish();
|
||||||
|
if !arg_remaining.is_empty() {
|
||||||
|
bail!("unrecognized arguments: {:?}", arg_remaining);
|
||||||
|
}
|
||||||
|
|
||||||
|
let input = std::fs::read_to_string(arg_input)?;
|
||||||
|
let doc = roxmltree::Document::parse(&input)?;
|
||||||
|
out.ucd = extract_values_from_ucd(&doc, &out)?;
|
||||||
|
|
||||||
|
// Find the best trie configuration over the given block sizes (2^2 - 2^8) and stages (4).
|
||||||
|
// More stages = Less size. The trajectory roughly follows a+b*c^stages, where c < 1.
|
||||||
|
// 4 still gives ~30% savings over 3 stages and going beyond 5 gives diminishing returns (<10%).
|
||||||
|
out.trie = build_best_trie(&out.ucd.values, 2, 8, 4);
|
||||||
|
// The joinRules above has 2 bits per value. This packs it into 32-bit integers to save space.
|
||||||
|
out.rules_gc = JOIN_RULES_GRAPHEME_CLUSTER
|
||||||
|
.map(|t| t.iter().map(|row| prepare_rules_row(row, 2, 3)).collect());
|
||||||
|
out.rules_lb = JOIN_RULES_LINE_BREAK
|
||||||
|
.iter()
|
||||||
|
.map(|row| prepare_rules_row(row, 1, 0))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Each rules item has the same length. Each item is 32 bits = 4 bytes.
|
||||||
|
out.total_size = out.trie.total_size + out.rules_gc.len() * out.rules_gc[0].len() * 4;
|
||||||
|
if out.arg_line_breaks {
|
||||||
|
out.total_size += out.rules_lb.len() * 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run a quick sanity check to ensure that the trie works as expected.
|
||||||
|
for (cp, &expected) in out.ucd.values.iter().enumerate() {
|
||||||
|
let mut actual = 0;
|
||||||
|
for s in &out.trie.stages {
|
||||||
|
actual = s.values[actual as usize + ((cp >> s.shift) & s.mask)];
|
||||||
|
}
|
||||||
|
assert_eq!(
|
||||||
|
expected, actual,
|
||||||
|
"trie sanity check failed for U+{:04X}",
|
||||||
|
cp
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let buf = match out.arg_lang {
|
||||||
|
Language::C => generate_c(out),
|
||||||
|
Language::Rust => generate_rust(out),
|
||||||
|
};
|
||||||
|
|
||||||
|
std::io::stdout().write_all(buf.as_bytes())?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Output {
|
||||||
|
fn args(&self) -> String {
|
||||||
|
let mut buf = String::new();
|
||||||
|
match self.arg_lang {
|
||||||
|
Language::C => buf.push_str("--lang=c"),
|
||||||
|
Language::Rust => buf.push_str("--lang=rust"),
|
||||||
|
}
|
||||||
|
if self.arg_no_ambiguous {
|
||||||
|
buf.push_str(" --no-ambiguous")
|
||||||
|
}
|
||||||
|
if self.arg_line_breaks {
|
||||||
|
buf.push_str(" --line-breaks")
|
||||||
|
}
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_c(out: Output) -> String {
|
||||||
|
let mut buf = String::new();
|
||||||
|
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
// BEGIN: Generated by grapheme-table-gen on {}, from {}, with {}, {} bytes
|
||||||
|
// clang-format off
|
||||||
|
",
|
||||||
|
chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true),
|
||||||
|
out.ucd.description,
|
||||||
|
out.args(),
|
||||||
|
out.total_size,
|
||||||
|
);
|
||||||
|
|
||||||
|
for stage in &out.trie.stages {
|
||||||
|
let mut width = 16;
|
||||||
|
if stage.index != 0 {
|
||||||
|
width = stage.mask + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = write!(
|
||||||
|
buf,
|
||||||
|
"static const uint{}_t s_stage{}[] = {{",
|
||||||
|
stage.bits, stage.index
|
||||||
|
);
|
||||||
|
for (j, &value) in stage.values.iter().enumerate() {
|
||||||
|
if j % width == 0 {
|
||||||
|
buf.push_str("\n ");
|
||||||
|
}
|
||||||
|
_ = write!(buf, " 0x{:01$x},", value, stage.bits / 4);
|
||||||
|
}
|
||||||
|
buf.push_str("\n};\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
"static const uint32_t s_grapheme_cluster_join_rules[{}][{}] = {{",
|
||||||
|
out.rules_gc.len(),
|
||||||
|
out.rules_gc[0].len()
|
||||||
|
);
|
||||||
|
for table in &out.rules_gc {
|
||||||
|
buf.push_str(" {\n");
|
||||||
|
for &r in table {
|
||||||
|
_ = writeln!(buf, " 0b{:032b},", r);
|
||||||
|
}
|
||||||
|
buf.push_str(" },\n");
|
||||||
|
}
|
||||||
|
buf.push_str("};\n");
|
||||||
|
|
||||||
|
if out.arg_line_breaks {
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
"static const uint32_t s_line_break_join_rules[{}] = {{",
|
||||||
|
out.rules_lb.len()
|
||||||
|
);
|
||||||
|
for r in &out.rules_lb {
|
||||||
|
_ = writeln!(buf, " 0b{r:032b},");
|
||||||
|
}
|
||||||
|
buf.push_str("};\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push_str("inline int ucd_grapheme_cluster_lookup(const uint32_t cp)\n{\n");
|
||||||
|
for stage in &out.trie.stages {
|
||||||
|
if stage.index == 0 {
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
" const uint{}_t s0 = s_stage0[cp >> {}];",
|
||||||
|
stage.bits, stage.shift,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
" const uint{}_t s{} = s_stage{}[s{} + ((cp >> {}) & {})];",
|
||||||
|
stage.bits,
|
||||||
|
stage.index,
|
||||||
|
stage.index,
|
||||||
|
stage.index - 1,
|
||||||
|
stage.shift,
|
||||||
|
stage.mask,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = writeln!(buf, " return s{};", out.trie.stages.len() - 1);
|
||||||
|
buf.push_str("}\n");
|
||||||
|
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
inline int ucd_grapheme_cluster_joins(const int state, const int lead, const int trail)
|
||||||
|
{{
|
||||||
|
const int l = lead & 15;
|
||||||
|
const int t = trail & 15;
|
||||||
|
return (s_grapheme_cluster_join_rules[state][l] >> (t * 2)) & 3;
|
||||||
|
}}
|
||||||
|
inline bool ucd_grapheme_cluster_joins_done(const int state)
|
||||||
|
{{
|
||||||
|
return state == 3;
|
||||||
|
}}
|
||||||
|
inline int ucd_grapheme_cluster_character_width(const int val)
|
||||||
|
{{
|
||||||
|
return (val >> 4) & 3;
|
||||||
|
}}
|
||||||
|
inline bool ucd_grapheme_cluster_is_newline(const int val)
|
||||||
|
{{
|
||||||
|
return (val & 15) > {};
|
||||||
|
}}
|
||||||
|
",
|
||||||
|
ClusterBreak::Control as u32,
|
||||||
|
);
|
||||||
|
|
||||||
|
if out.arg_line_breaks {
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
inline bool ucd_line_break_joins(const int lead, const int trail)
|
||||||
|
{{
|
||||||
|
const int l = lead >> 6;
|
||||||
|
const int t = trail >> 6;
|
||||||
|
return (s_line_break_join_rules[l] >> t) & 1;
|
||||||
|
}}
|
||||||
|
",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push_str("// clang-format on\n// END: Generated by grapheme-table-gen\n");
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
||||||
|
fn generate_rust(out: Output) -> String {
|
||||||
|
let mut buf = String::new();
|
||||||
|
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
"// BEGIN: Generated by grapheme-table-gen on {}, from {}, with {}, {} bytes",
|
||||||
|
chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true),
|
||||||
|
out.ucd.description,
|
||||||
|
out.args(),
|
||||||
|
out.total_size,
|
||||||
|
);
|
||||||
|
|
||||||
|
for stage in &out.trie.stages {
|
||||||
|
let mut width = 16;
|
||||||
|
if stage.index != 0 {
|
||||||
|
width = stage.mask + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = write!(
|
||||||
|
buf,
|
||||||
|
"#[rustfmt::skip]\npub const STAGE{}: [u{}; {}] = [",
|
||||||
|
stage.index,
|
||||||
|
stage.bits,
|
||||||
|
stage.values.len(),
|
||||||
|
);
|
||||||
|
for (j, &value) in stage.values.iter().enumerate() {
|
||||||
|
if j % width == 0 {
|
||||||
|
buf.push_str("\n ");
|
||||||
|
}
|
||||||
|
_ = write!(buf, " 0x{:01$x},", value, stage.bits / 4);
|
||||||
|
}
|
||||||
|
buf.push_str("\n];\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
"#[rustfmt::skip]\npub const GRAPHEME_JOIN_RULES: [[u32; {}]; {}] = [",
|
||||||
|
out.rules_gc[0].len(),
|
||||||
|
out.rules_gc.len(),
|
||||||
|
);
|
||||||
|
for table in &out.rules_gc {
|
||||||
|
buf.push_str(" [\n");
|
||||||
|
for &r in table {
|
||||||
|
_ = writeln!(buf, " 0b{:032b},", r);
|
||||||
|
}
|
||||||
|
buf.push_str(" ],\n");
|
||||||
|
}
|
||||||
|
buf.push_str("];\n");
|
||||||
|
|
||||||
|
if out.arg_line_breaks {
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
"#[rustfmt::skip]\npub const LINE_BREAK_JOIN_RULES: [u32; {}] = [",
|
||||||
|
out.rules_lb.len(),
|
||||||
|
);
|
||||||
|
for r in &out.rules_lb {
|
||||||
|
_ = writeln!(buf, " 0b{r:032b},");
|
||||||
|
}
|
||||||
|
buf.push_str("];\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ucd_grapheme_cluster_lookup(cp: char) -> usize {{
|
||||||
|
let cp = cp as usize;
|
||||||
|
",
|
||||||
|
);
|
||||||
|
for stage in &out.trie.stages {
|
||||||
|
if stage.index == 0 {
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
" let s = STAGE{}[cp >> {}] as usize;",
|
||||||
|
stage.index, stage.shift,
|
||||||
|
);
|
||||||
|
} else if stage.index != out.trie.stages.len() - 1 {
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
" let s = STAGE{}[s + (cp & {})] as usize;",
|
||||||
|
stage.index, stage.mask,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
_ = writeln!(
|
||||||
|
buf,
|
||||||
|
" STAGE{}[s + ((cp >> {}) & {})] as usize",
|
||||||
|
stage.index, stage.shift, stage.mask,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf.push_str("}\n");
|
||||||
|
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ucd_grapheme_cluster_joins(state: u32, lead: usize, trail: usize) -> u32 {{
|
||||||
|
let l = lead & 15;
|
||||||
|
let t = trail & 15;
|
||||||
|
(GRAPHEME_JOIN_RULES[state as usize][l] >> (t * 2)) & 3
|
||||||
|
}}
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ucd_grapheme_cluster_joins_done(state: u32) -> bool {{
|
||||||
|
state == 3
|
||||||
|
}}
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ucd_grapheme_cluster_character_width(val: usize) -> usize {{
|
||||||
|
(val >> 4) & 3
|
||||||
|
}}
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ucd_grapheme_cluster_is_newline(val: usize) -> bool {{
|
||||||
|
(val & 15) > {}
|
||||||
|
}}
|
||||||
|
",
|
||||||
|
ClusterBreak::Control as u32,
|
||||||
|
);
|
||||||
|
|
||||||
|
if out.arg_line_breaks {
|
||||||
|
_ = writedoc!(
|
||||||
|
buf,
|
||||||
|
"
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn ucd_line_break_joins(lead: usize, trail: usize) -> bool {{
|
||||||
|
let l = lead >> 6;
|
||||||
|
let t = trail >> 6;
|
||||||
|
((LINE_BREAK_JOIN_RULES[l] >> t) & 1) != 0
|
||||||
|
}}
|
||||||
|
",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.push_str("// END: Generated by grapheme-table-gen\n");
|
||||||
|
buf
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_values_from_ucd(doc: &roxmltree::Document, out: &Output) -> anyhow::Result<Ucd> {
|
||||||
|
let ambiguous_value = if out.arg_no_ambiguous {
|
||||||
|
CharacterWidth::Narrow
|
||||||
|
} else {
|
||||||
|
CharacterWidth::Ambiguous
|
||||||
|
};
|
||||||
|
let mut values = vec![
|
||||||
|
trie_value(
|
||||||
|
ClusterBreak::Other,
|
||||||
|
CharacterWidth::Narrow,
|
||||||
|
LineBreak::Other
|
||||||
|
);
|
||||||
|
1114112
|
||||||
|
];
|
||||||
|
|
||||||
|
let ns = "http://www.unicode.org/ns/2003/ucd/1.0";
|
||||||
|
let root = doc.root_element();
|
||||||
|
let description = root
|
||||||
|
.children()
|
||||||
|
.find(|n| n.has_tag_name((ns, "description")))
|
||||||
|
.context("missing ucd description")?;
|
||||||
|
let repertoire = root
|
||||||
|
.children()
|
||||||
|
.find(|n| n.has_tag_name((ns, "repertoire")))
|
||||||
|
.context("missing ucd repertoire")?;
|
||||||
|
let description = description.text().unwrap_or_default().to_string();
|
||||||
|
|
||||||
|
for group in repertoire.children().filter(|n| n.is_element()) {
|
||||||
|
const DEFAULT_ATTRIBUTES: UcdAttributes = UcdAttributes {
|
||||||
|
general_category: "",
|
||||||
|
line_break: "",
|
||||||
|
grapheme_cluster_break: "",
|
||||||
|
indic_conjunct_break: "",
|
||||||
|
extended_pictographic: "",
|
||||||
|
east_asian: "",
|
||||||
|
};
|
||||||
|
let group_attributes = extract_attributes(&group, &DEFAULT_ATTRIBUTES);
|
||||||
|
|
||||||
|
for char in group.children().filter(|n| n.is_element()) {
|
||||||
|
let char_attributes = extract_attributes(&char, &group_attributes);
|
||||||
|
let range = extract_range(&char);
|
||||||
|
|
||||||
|
let mut cb = match char_attributes.grapheme_cluster_break {
|
||||||
|
"XX" => ClusterBreak::Other, // Anything else
|
||||||
|
// We ignore GB3 which demands that CR × LF do not break apart, because
|
||||||
|
// * these control characters won't normally reach our text storage
|
||||||
|
// * otherwise we're in a raw write mode and historically conhost stores them in separate cells
|
||||||
|
"CR" => ClusterBreak::CR, // Carriage Return
|
||||||
|
"LF" => ClusterBreak::LF, // Line Feed
|
||||||
|
"CN" => ClusterBreak::Control, // Control
|
||||||
|
"EX" | "SM" => ClusterBreak::Extend, // Extend, SpacingMark
|
||||||
|
"PP" => ClusterBreak::Prepend, // Prepend
|
||||||
|
"ZWJ" => ClusterBreak::ZWJ, // Zero Width Joiner
|
||||||
|
"RI" => ClusterBreak::RI, // Regional Indicator
|
||||||
|
"L" => ClusterBreak::HangulL, // Hangul Syllable Type L
|
||||||
|
"V" => ClusterBreak::HangulV, // Hangul Syllable Type V
|
||||||
|
"T" => ClusterBreak::HangulT, // Hangul Syllable Type T
|
||||||
|
"LV" => ClusterBreak::HangulLV, // Hangul Syllable Type LV
|
||||||
|
"LVT" => ClusterBreak::HangulLVT, // Hangul Syllable Type LVT
|
||||||
|
_ => bail!(
|
||||||
|
"Unrecognized GCB {:?} for U+{:04X} to U+{:04X}",
|
||||||
|
char_attributes.grapheme_cluster_break,
|
||||||
|
range.start(),
|
||||||
|
range.end()
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
if char_attributes.extended_pictographic == "Y" {
|
||||||
|
// Currently every single Extended_Pictographic codepoint happens to be GCB=XX.
|
||||||
|
// This is fantastic for us because it means we can stuff it into the ClusterBreak enum
|
||||||
|
// and treat it as an alias of EXTEND, but with the special GB11 properties.
|
||||||
|
if cb != ClusterBreak::Other {
|
||||||
|
bail!(
|
||||||
|
"Unexpected GCB {:?} with ExtPict=Y for U+{:04X} to U+{:04X}",
|
||||||
|
char_attributes.grapheme_cluster_break,
|
||||||
|
range.start(),
|
||||||
|
range.end()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
cb = ClusterBreak::ExtPic;
|
||||||
|
}
|
||||||
|
|
||||||
|
cb = match char_attributes.indic_conjunct_break {
|
||||||
|
"None" | "Extend" => cb,
|
||||||
|
"Linker" => ClusterBreak::InCBLinker,
|
||||||
|
"Consonant" => ClusterBreak::InCBConsonant,
|
||||||
|
_ => bail!(
|
||||||
|
"Unrecognized InCB {:?} for U+{:04X} to U+{:04X}",
|
||||||
|
char_attributes.indic_conjunct_break,
|
||||||
|
range.start(),
|
||||||
|
range.end()
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut width = match char_attributes.east_asian {
|
||||||
|
"N" | "Na" | "H" => CharacterWidth::Narrow, // Half-width, Narrow, Neutral
|
||||||
|
"F" | "W" => CharacterWidth::Wide, // Wide, Full-width
|
||||||
|
"A" => ambiguous_value, // Ambiguous
|
||||||
|
_ => bail!(
|
||||||
|
"Unrecognized ea {:?} for U+{:04X} to U+{:04X}",
|
||||||
|
char_attributes.east_asian,
|
||||||
|
range.start(),
|
||||||
|
range.end()
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
|
// There's no "ea" attribute for "zero width" so we need to do that ourselves. This matches:
|
||||||
|
// Me: Mark, enclosing
|
||||||
|
// Mn: Mark, non-spacing
|
||||||
|
// Cf: Control, format
|
||||||
|
match char_attributes.general_category {
|
||||||
|
"Cf" if cb == ClusterBreak::Control => {
|
||||||
|
// A significant portion of Cf characters are not just gc=Cf (= commonly considered zero-width),
|
||||||
|
// but also GCB=CN (= does not join). This is a bit of a problem for terminals,
|
||||||
|
// because they don't support zero-width graphemes, as zero-width columns can't exist.
|
||||||
|
// So, we turn all of them into Extend, which is roughly how wcswidth() would treat them.
|
||||||
|
cb = ClusterBreak::Extend;
|
||||||
|
width = CharacterWidth::ZeroWidth;
|
||||||
|
}
|
||||||
|
"Me" | "Mn" | "Cf" => {
|
||||||
|
width = CharacterWidth::ZeroWidth;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
};
|
||||||
|
|
||||||
|
let lb = if out.arg_line_breaks {
|
||||||
|
let lb_ea = matches!(char_attributes.east_asian, "F" | "W" | "H");
|
||||||
|
match char_attributes.line_break {
|
||||||
|
"WJ" => LineBreak::WordJoiner,
|
||||||
|
"ZW" => LineBreak::ZeroWidthSpace,
|
||||||
|
"GL" => LineBreak::Glue,
|
||||||
|
"SP" => LineBreak::Space,
|
||||||
|
|
||||||
|
"BA" => LineBreak::BreakAfter,
|
||||||
|
"BB" => LineBreak::BreakBefore,
|
||||||
|
"HY" => LineBreak::Hyphen,
|
||||||
|
|
||||||
|
"CL" => LineBreak::ClosePunctuation,
|
||||||
|
"CP" if lb_ea => LineBreak::CloseParenthesis_EA,
|
||||||
|
"CP" => LineBreak::CloseParenthesis_NotEA,
|
||||||
|
"EX" => LineBreak::Exclamation,
|
||||||
|
"IN" => LineBreak::Inseparable,
|
||||||
|
"NS" => LineBreak::Nonstarter,
|
||||||
|
"OP" if lb_ea => LineBreak::OpenPunctuation_EA,
|
||||||
|
"OP" => LineBreak::OpenPunctuation_NotEA,
|
||||||
|
"QU" => LineBreak::Quotation,
|
||||||
|
|
||||||
|
"IS" => LineBreak::InfixNumericSeparator,
|
||||||
|
"NU" => LineBreak::Numeric,
|
||||||
|
"PO" => LineBreak::PostfixNumeric,
|
||||||
|
"PR" => LineBreak::PrefixNumeric,
|
||||||
|
"SY" => LineBreak::SymbolsAllowingBreakAfter,
|
||||||
|
|
||||||
|
"AL" | "HL" => LineBreak::Alphabetic,
|
||||||
|
"ID" | "EB" | "EM" => LineBreak::Ideographic,
|
||||||
|
|
||||||
|
_ => LineBreak::Other,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LineBreak::Other
|
||||||
|
};
|
||||||
|
|
||||||
|
values[range].fill(trie_value(cb, width, lb));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// U+00AD: Soft Hyphen
|
||||||
|
// A soft hyphen is a hint that a word break is allowed at that position.
|
||||||
|
// By default, the glyph is supposed to be invisible, and only if
|
||||||
|
// a word break occurs, the text renderer should display a hyphen.
|
||||||
|
// A terminal does not support computerized typesetting, but unlike the other
|
||||||
|
// gc=Cf cases we give it a Narrow width, because that matches wcswidth().
|
||||||
|
values[0x00AD] = trie_value_mod_width(values[0x00AD], CharacterWidth::Narrow);
|
||||||
|
|
||||||
|
// U+2500 to U+257F: Box Drawing block
|
||||||
|
// U+2580 to U+259F: Block Elements block
|
||||||
|
// By default, CharacterWidth.Ambiguous, but by convention .Narrow in terminals.
|
||||||
|
//
|
||||||
|
// Most of these characters are LineBreak.Other, but some are actually LineBreak.Alphabetic.
|
||||||
|
// But to us this doesn't really matter much, because it doesn't make much sense anyway that
|
||||||
|
// a light double dash is "alphabetic" while a light triple dash is not.
|
||||||
|
values[0x2500..=0x259F].fill(trie_value(
|
||||||
|
ClusterBreak::Other,
|
||||||
|
CharacterWidth::Narrow,
|
||||||
|
LineBreak::Other,
|
||||||
|
));
|
||||||
|
|
||||||
|
// U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones.
|
||||||
|
// By convention, this turns them from being ambiguous width (= narrow) into wide ones.
|
||||||
|
// We achieve this here by explicitly giving this codepoint a wide width.
|
||||||
|
// Later down below we'll clamp width back to <= 2.
|
||||||
|
//
|
||||||
|
// U+FE0F actually has a LineBreak property of CM (Combining Mark),
|
||||||
|
// but for us that's equivalent to Other.
|
||||||
|
values[0xFE0F] = trie_value_mod_width(values[0xFE0F], CharacterWidth::Wide);
|
||||||
|
|
||||||
|
Ok(Ucd {
|
||||||
|
description,
|
||||||
|
values,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
struct UcdAttributes<'a> {
|
||||||
|
general_category: &'a str,
|
||||||
|
line_break: &'a str,
|
||||||
|
grapheme_cluster_break: &'a str,
|
||||||
|
indic_conjunct_break: &'a str,
|
||||||
|
extended_pictographic: &'a str,
|
||||||
|
east_asian: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_attributes<'a>(
|
||||||
|
node: &'a roxmltree::Node,
|
||||||
|
default: &'a UcdAttributes,
|
||||||
|
) -> UcdAttributes<'a> {
|
||||||
|
UcdAttributes {
|
||||||
|
general_category: node.attribute("gc").unwrap_or(default.general_category),
|
||||||
|
line_break: node.attribute("lb").unwrap_or(default.line_break),
|
||||||
|
grapheme_cluster_break: node
|
||||||
|
.attribute("GCB")
|
||||||
|
.unwrap_or(default.grapheme_cluster_break),
|
||||||
|
indic_conjunct_break: node
|
||||||
|
.attribute("InCB")
|
||||||
|
.unwrap_or(default.indic_conjunct_break),
|
||||||
|
extended_pictographic: node
|
||||||
|
.attribute("ExtPict")
|
||||||
|
.unwrap_or(default.extended_pictographic),
|
||||||
|
east_asian: node.attribute("ea").unwrap_or(default.east_asian),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_range(node: &roxmltree::Node) -> RangeInclusive<usize> {
|
||||||
|
let (first, last) = match node.attribute("cp") {
|
||||||
|
Some(val) => {
|
||||||
|
let cp = usize::from_str_radix(val, 16).unwrap();
|
||||||
|
(cp, cp)
|
||||||
|
}
|
||||||
|
None => (
|
||||||
|
usize::from_str_radix(node.attribute("first-cp").unwrap_or("0"), 16).unwrap(),
|
||||||
|
usize::from_str_radix(node.attribute("last-cp").unwrap_or("0"), 16).unwrap(),
|
||||||
|
),
|
||||||
|
};
|
||||||
|
first..=last
|
||||||
|
}
|
||||||
|
|
||||||
|
fn trie_value(cb: ClusterBreak, width: CharacterWidth, lb: LineBreak) -> TrieType {
|
||||||
|
let cb = cb as TrieType;
|
||||||
|
let width = (width as TrieType) << 4;
|
||||||
|
let lb = (lb as TrieType) << 6;
|
||||||
|
cb | width | lb
|
||||||
|
}
|
||||||
|
|
||||||
|
fn trie_value_mod_width(value: TrieType, width: CharacterWidth) -> TrieType {
|
||||||
|
let value = value & !(3 << 4); // mask out the width bits
|
||||||
|
let width = (width as TrieType) << 4;
|
||||||
|
value | width
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_best_trie(
|
||||||
|
uncompressed: &[TrieType],
|
||||||
|
min_shift: usize,
|
||||||
|
max_shift: usize,
|
||||||
|
stages: usize,
|
||||||
|
) -> Trie {
|
||||||
|
let depth = stages - 1;
|
||||||
|
let delta = max_shift - min_shift + 1;
|
||||||
|
let total = delta.pow(depth as u32);
|
||||||
|
|
||||||
|
let mut tasks = Vec::new();
|
||||||
|
for i in 0..total {
|
||||||
|
let mut shifts = vec![0; depth];
|
||||||
|
let mut index = i;
|
||||||
|
for s in &mut shifts {
|
||||||
|
*s = min_shift + (index % delta);
|
||||||
|
index /= delta;
|
||||||
|
}
|
||||||
|
tasks.push(shifts);
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks
|
||||||
|
.par_iter()
|
||||||
|
.map(|shifts| build_trie(uncompressed.to_vec(), shifts))
|
||||||
|
.min_by_key(|t| t.total_size)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_trie(mut uncompressed: Vec<TrieType>, shifts: &[usize]) -> Trie {
|
||||||
|
let mut cumulative_shift = 0;
|
||||||
|
let mut stages = Vec::new();
|
||||||
|
|
||||||
|
for &shift in shifts.iter() {
|
||||||
|
let chunk_size = 1 << shift;
|
||||||
|
let mut cache = HashMap::new();
|
||||||
|
let mut compressed = Vec::new();
|
||||||
|
let mut offsets = Vec::new();
|
||||||
|
|
||||||
|
for off in (0..uncompressed.len()).step_by(chunk_size) {
|
||||||
|
let chunk = &uncompressed[off..off + chunk_size.min(uncompressed.len() - off)];
|
||||||
|
let offset = cache.entry(chunk).or_insert_with(|| {
|
||||||
|
if let Some(existing) = find_existing(&compressed, chunk) {
|
||||||
|
existing as TrieType
|
||||||
|
} else {
|
||||||
|
let overlap = measure_overlap(&compressed, chunk);
|
||||||
|
compressed.extend_from_slice(&chunk[overlap..]);
|
||||||
|
(compressed.len() - chunk.len()) as TrieType
|
||||||
|
}
|
||||||
|
});
|
||||||
|
offsets.push(*offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
stages.push(Stage {
|
||||||
|
values: compressed,
|
||||||
|
index: shifts.len() - stages.len(),
|
||||||
|
shift: cumulative_shift,
|
||||||
|
mask: chunk_size - 1,
|
||||||
|
bits: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
uncompressed = offsets;
|
||||||
|
cumulative_shift += shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
stages.push(Stage {
|
||||||
|
values: uncompressed,
|
||||||
|
index: 0,
|
||||||
|
shift: cumulative_shift,
|
||||||
|
mask: usize::MAX,
|
||||||
|
bits: 0,
|
||||||
|
});
|
||||||
|
|
||||||
|
stages.reverse();
|
||||||
|
|
||||||
|
for stage in stages.iter_mut() {
|
||||||
|
let max_val = stage.values.iter().max().cloned().unwrap_or(0);
|
||||||
|
stage.bits = match max_val {
|
||||||
|
0..0x100 => 8,
|
||||||
|
0x100..0x10000 => 16,
|
||||||
|
_ => 32,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_size: usize = stages
|
||||||
|
.iter()
|
||||||
|
.map(|stage| (stage.bits / 8) * stage.values.len())
|
||||||
|
.sum();
|
||||||
|
|
||||||
|
Trie { stages, total_size }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_existing(haystack: &[TrieType], needle: &[TrieType]) -> Option<usize> {
|
||||||
|
haystack
|
||||||
|
.windows(needle.len())
|
||||||
|
.position(|window| window == needle)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn measure_overlap(prev: &[TrieType], next: &[TrieType]) -> usize {
|
||||||
|
(0..prev.len().min(next.len()))
|
||||||
|
.rev()
|
||||||
|
.find(|&i| prev[prev.len() - i..] == next[..i])
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prepare_rules_row(row: &[i32], bit_width: usize, non_joiner_value: i32) -> u32 {
|
||||||
|
row.iter().enumerate().fold(0u32, |acc, (trail, &value)| {
|
||||||
|
let value = if value < 0 { non_joiner_value } else { value };
|
||||||
|
acc | ((value as u32) << (trail * bit_width))
|
||||||
|
})
|
||||||
|
}
|
279
tools/grapheme-table-gen/src/rules.rs
Normal file
279
tools/grapheme-table-gen/src/rules.rs
Normal file
|
@ -0,0 +1,279 @@
|
||||||
|
// Used as an indicator in our rules for ÷ ("does not join").
|
||||||
|
// Underscore is one of the few characters that are permitted as an identifier,
|
||||||
|
// are monospace in most fonts and also visually distinct from the digits.
|
||||||
|
const X: i32 = -1;
|
||||||
|
|
||||||
|
// The following rules are based on the Grapheme Cluster Boundaries section of Unicode Standard Annex #29,
|
||||||
|
// but slightly modified to allow for use with a plain MxN lookup table.
|
||||||
|
//
|
||||||
|
// Break at the start and end of text, unless the text is empty.
|
||||||
|
// GB1: ~ sot ÷ Any
|
||||||
|
// GB2: ~ Any ÷ eot
|
||||||
|
// Handled by our ucd_* functions.
|
||||||
|
//
|
||||||
|
// Do not break between a CR and LF. Otherwise, break before and after controls.
|
||||||
|
// GB3: ✓ CR × LF
|
||||||
|
// GB4: ✓ (Control | CR | LF) ÷
|
||||||
|
// GB5: ✓ ÷ (Control | CR | LF)
|
||||||
|
//
|
||||||
|
// Do not break Hangul syllable or other conjoining sequences.
|
||||||
|
// GB6: ✓ L × (L | V | LV | LVT)
|
||||||
|
// GB7: ✓ (LV | V) × (V | T)
|
||||||
|
// GB8: ✓ (LVT | T) × T
|
||||||
|
//
|
||||||
|
// Do not break before extending characters or ZWJ.
|
||||||
|
// GB9: ✓ × (Extend | ZWJ)
|
||||||
|
//
|
||||||
|
// Do not break before SpacingMarks, or after Prepend characters.
|
||||||
|
// GB9a: ✓ × SpacingMark
|
||||||
|
// GB9b: ✓ Prepend ×
|
||||||
|
//
|
||||||
|
// Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker.
|
||||||
|
// GB9c: ~ \p{InCB=Linker} × \p{InCB=Consonant}
|
||||||
|
// × \p{InCB=Linker}
|
||||||
|
// modified from
|
||||||
|
// \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* × \p{InCB=Consonant}
|
||||||
|
// because this has almost the same effect from what I can tell for most text, and greatly simplifies our design.
|
||||||
|
//
|
||||||
|
// Do not break within emoji modifier sequences or emoji zwj sequences.
|
||||||
|
// GB11: ~ ZWJ × \p{Extended_Pictographic} modified from \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic}
|
||||||
|
// because this allows us to use LUTs, while working for most valid text.
|
||||||
|
//
|
||||||
|
// Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point.
|
||||||
|
// GB12: ~ sot (RI RI)* RI × RI
|
||||||
|
// GB13: ~ [^RI] (RI RI)* RI × RI
|
||||||
|
// the lookup table we generate supports RIs via something akin to RI ÷ RI × RI ÷ RI, but the corresponding
|
||||||
|
// grapheme cluster algorithm doesn't count them. It would need to be updated to recognize and special-case RIs.
|
||||||
|
//
|
||||||
|
// Otherwise, break everywhere.
|
||||||
|
// GB999: ✓ Any ÷ Any
|
||||||
|
//
|
||||||
|
// This is a great reference for the resulting table:
|
||||||
|
// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html
|
||||||
|
#[rustfmt::skip]
|
||||||
|
pub const JOIN_RULES_GRAPHEME_CLUSTER: [[[i32; 16]; 16]; 2] = [
|
||||||
|
// Base table
|
||||||
|
[
|
||||||
|
/* ↓ leading → trailing codepoint */
|
||||||
|
/* | Other | Extend | RI | Prepend | HangulL | HangulV | HangulT | HangulLV | HangulLVT | InCBLinker | InCBConsonant | ExtPic | ZWJ | Control | CR | LF | */
|
||||||
|
/* Other | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* Extend | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* RI | */ [X /* | */, 0 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* Prepend | */ [0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* HangulL | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, 0 /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* HangulV | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* HangulT | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* HangulLV | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* HangulLVT | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* InCBLinker | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* InCBConsonant | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* ExtPic | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* ZWJ | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* Control | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* CR | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* LF | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
],
|
||||||
|
// Once we have encountered a Regional Indicator pair we'll enter this table.
|
||||||
|
// It's a copy of the base table, but instead of RI × RI, we're RI ÷ RI.
|
||||||
|
[
|
||||||
|
/* ↓ leading → trailing codepoint */
|
||||||
|
/* | Other | CR | LF | Control | Extend | RI | Prepend | HangulL | HangulV | HangulT | HangulLV | HangulLVT | InCBLinker | InCBConsonant | ExtPic | ZWJ | */
|
||||||
|
/* Other | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* CR | */ [X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* LF | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* Control | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* Extend | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* RI | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* Prepend | */ [0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */],
|
||||||
|
/* HangulL | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, 0 /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* HangulV | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* HangulT | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* HangulLV | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* HangulLVT | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* InCBLinker | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* InCBConsonant | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* ExtPic | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */],
|
||||||
|
/* ZWJ | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, 0 /* | */, 0 /* | */],
|
||||||
|
],
|
||||||
|
];
|
||||||
|
|
||||||
|
// The following rules are based on Unicode Standard Annex #14: Line Breaking Properties,
|
||||||
|
// but heavily modified to allow for use with lookup tables.
|
||||||
|
//
|
||||||
|
// NOTE: If you convert these rules into a lookup table, you must apply them in reverse order.
|
||||||
|
// This is because the rules are ordered from most to least important (e.g. LB8 overrides LB18).
|
||||||
|
//
|
||||||
|
// Resolve line breaking classes:
|
||||||
|
// LB1: Assign a line breaking class [...].
|
||||||
|
// ✗ Unicode does that for us via the "lb" attribute.
|
||||||
|
//
|
||||||
|
// Start and end of text:
|
||||||
|
// LB2: Never break at the start of text.
|
||||||
|
// ~ Functionality not needed.
|
||||||
|
// LB3: Always break at the end of text.
|
||||||
|
// ~ Functionality not needed.
|
||||||
|
//
|
||||||
|
// Mandatory breaks:
|
||||||
|
// LB4: Always break after hard line breaks.
|
||||||
|
// ~ Handled by our ucd_* functions.
|
||||||
|
// LB5: Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
|
||||||
|
// ~ Handled by our ucd_* functions.
|
||||||
|
// LB6: Do not break before hard line breaks.
|
||||||
|
// ~ Handled by our ucd_* functions.
|
||||||
|
//
|
||||||
|
// Explicit breaks and non-breaks:
|
||||||
|
// LB7: Do not break before spaces or zero width space.
|
||||||
|
// ✗ It's way simpler to treat spaces as if they always break.
|
||||||
|
// LB8: Break before any character following a zero-width space, even if one or more spaces intervene.
|
||||||
|
// ~ ZW ÷ modified from ZW SP* ÷ because it's not worth being this anal about accuracy here.
|
||||||
|
// LB8a: Do not break after a zero width joiner.
|
||||||
|
// ~ Our ucd_* functions never break within grapheme clusters.
|
||||||
|
//
|
||||||
|
// Combining marks:
|
||||||
|
// LB9: Do not break a combining character sequence; treat it as if it has the line breaking class of the base character in all of the following rules. Treat ZWJ as if it were CM.
|
||||||
|
// ~ Our ucd_* functions never break within grapheme clusters.
|
||||||
|
// LB10: Treat any remaining combining mark or ZWJ as AL.
|
||||||
|
// ✗ To be honest, I'm not entirely sure, I understand the implications of this rule.
|
||||||
|
//
|
||||||
|
// Word joiner:
|
||||||
|
// LB11: Do not break before or after Word joiner and related characters.
|
||||||
|
// ✓ × WJ
|
||||||
|
// ✓ WJ ×
|
||||||
|
//
|
||||||
|
// Non-breaking characters:
|
||||||
|
// LB12: Do not break after NBSP and related characters.
|
||||||
|
// ✓ GL ×
|
||||||
|
// LB12a: Do not break before NBSP and related characters, except after spaces and hyphens.
|
||||||
|
// ✓ [^SP BA HY] × GL
|
||||||
|
//
|
||||||
|
// Opening and closing:
|
||||||
|
// LB13: Do not break before ']' or '!' or '/', even after spaces.
|
||||||
|
// ✓ × CL
|
||||||
|
// ✓ × CP
|
||||||
|
// ✓ × EX
|
||||||
|
// ✓ × SY
|
||||||
|
// LB14: Do not break after '[', even after spaces.
|
||||||
|
// ~ OP × modified from OP SP* × just because it's simpler. It would be nice to address this.
|
||||||
|
// LB15a: Do not break after an unresolved initial punctuation that lies at the start of the line, after a space, after opening punctuation, or after an unresolved quotation mark, even after spaces.
|
||||||
|
// ✗ Not implemented. Seemed too complex for little gain?
|
||||||
|
// LB15b: Do not break before an unresolved final punctuation that lies at the end of the line, before a space, before a prohibited break, or before an unresolved quotation mark, even after spaces.
|
||||||
|
// ✗ Not implemented. Seemed too complex for little gain?
|
||||||
|
// LB15c: Break before a decimal mark that follows a space, for instance, in 'subtract .5'.
|
||||||
|
// ~ SP ÷ IS modified from SP ÷ IS NU because this fits neatly with LB15d.
|
||||||
|
// LB15d: Otherwise, do not break before ';', ',', or '.', even after spaces.
|
||||||
|
// ✓ × IS
|
||||||
|
// LB16: Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces.
|
||||||
|
// ✗ Not implemented. Could be useful in the future, but its usefulness seemed limited to me.
|
||||||
|
// LB17: Do not break within '——', even with intervening spaces.
|
||||||
|
// ✗ Not implemented. Terminal applications nor code use em-dashes much anyway.
|
||||||
|
//
|
||||||
|
// Spaces:
|
||||||
|
// LB18: Break after spaces.
|
||||||
|
// ✗ Implemented because we didn't implement LB7.
|
||||||
|
//
|
||||||
|
// Special case rules:
|
||||||
|
// LB19: Do not break before non-initial unresolved quotation marks, such as ' ” ' or ' " ', nor after non-final unresolved quotation marks, such as ' “ ' or ' " '.
|
||||||
|
// ~ × QU modified from × [ QU - \p{Pi} ]
|
||||||
|
// ~ QU × modified from [ QU - \p{Pf} ] ×
|
||||||
|
// We implement the Unicode 16.0 instead of 16.1 rules, because it's simpler and allows us to use a LUT.
|
||||||
|
// LB19a: Unless surrounded by East Asian characters, do not break either side of any unresolved quotation marks.
|
||||||
|
// ✗ [^$EastAsian] × QU
|
||||||
|
// ✗ × QU ( [^$EastAsian] | eot )
|
||||||
|
// ✗ QU × [^$EastAsian]
|
||||||
|
// ✗ ( sot | [^$EastAsian] ) QU ×
|
||||||
|
// Same as LB19.
|
||||||
|
// LB20: Break before and after unresolved CB.
|
||||||
|
// ✗ We break by default. Unicode inline objects are super irrelevant in a terminal in either case.
|
||||||
|
// LB20a: Do not break after a word-initial hyphen.
|
||||||
|
// ✗ Not implemented. Seemed not worth the hassle as the window will almost always be >1 char wide.
|
||||||
|
// LB21: Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana, and other non-starters, or after acute accents.
|
||||||
|
// ✓ × BA
|
||||||
|
// ✓ × HY
|
||||||
|
// ✓ × NS
|
||||||
|
// ✓ BB ×
|
||||||
|
// LB21a: Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew.
|
||||||
|
// ✗ Not implemented. Perhaps in the future.
|
||||||
|
// LB21b: Do not break between Solidus and Hebrew letters.
|
||||||
|
// ✗ Not implemented. Perhaps in the future.
|
||||||
|
// LB22: Do not break before ellipses.
|
||||||
|
// ✓ × IN
|
||||||
|
//
|
||||||
|
// Numbers:
|
||||||
|
// LB23: Do not break between digits and letters.
|
||||||
|
// ✓ (AL | HL) × NU
|
||||||
|
// ✓ NU × (AL | HL)
|
||||||
|
// LB23a: Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
|
||||||
|
// ✓ PR × (ID | EB | EM)
|
||||||
|
// ✓ (ID | EB | EM) × PO
|
||||||
|
// LB24: Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix.
|
||||||
|
// ✓ (PR | PO) × (AL | HL)
|
||||||
|
// ✓ (AL | HL) × (PR | PO)
|
||||||
|
// LB25: Do not break numbers:
|
||||||
|
// ~ CL × PO modified from NU ( SY | IS )* CL × PO
|
||||||
|
// ~ CP × PO modified from NU ( SY | IS )* CP × PO
|
||||||
|
// ~ CL × PR modified from NU ( SY | IS )* CL × PR
|
||||||
|
// ~ CP × PR modified from NU ( SY | IS )* CP × PR
|
||||||
|
// ~ ( NU | SY | IS ) × PO modified from NU ( SY | IS )* × PO
|
||||||
|
// ~ ( NU | SY | IS ) × PR modified from NU ( SY | IS )* × PR
|
||||||
|
// ~ PO × OP modified from PO × OP NU
|
||||||
|
// ~ PO × OP modified from PO × OP IS NU
|
||||||
|
// ✓ PO × NU
|
||||||
|
// ~ PR × OP modified from PR × OP NU
|
||||||
|
// ~ PR × OP modified from PR × OP IS NU
|
||||||
|
// ✓ PR × NU
|
||||||
|
// ✓ HY × NU
|
||||||
|
// ✓ IS × NU
|
||||||
|
// ~ ( NU | SY | IS ) × NU modified from NU ( SY | IS )* × NU
|
||||||
|
// Most were simplified because the cases this additionally allows don't matter much here.
|
||||||
|
//
|
||||||
|
// Korean syllable blocks
|
||||||
|
// LB26: Do not break a Korean syllable.
|
||||||
|
// ✗ Our ucd_* functions never break within grapheme clusters.
|
||||||
|
// LB27: Treat a Korean Syllable Block the same as ID.
|
||||||
|
// ✗ Our ucd_* functions never break within grapheme clusters.
|
||||||
|
//
|
||||||
|
// Finally, join alphabetic letters into words and break everything else.
|
||||||
|
// LB28: Do not break between alphabetics ("at").
|
||||||
|
// ✓ (AL | HL) × (AL | HL)
|
||||||
|
// LB28a: Do not break inside the orthographic syllables of Brahmic scripts.
|
||||||
|
// ✗ Our ucd_* functions never break within grapheme clusters.
|
||||||
|
// LB29: Do not break between numeric punctuation and alphabetics ("e.g.").
|
||||||
|
// ✓ IS × (AL | HL)
|
||||||
|
// LB30: Do not break between letters, numbers, or ordinary symbols and opening or closing parentheses.
|
||||||
|
// ✓ (AL | HL | NU) × [OP-$EastAsian]
|
||||||
|
// ✓ [CP-$EastAsian] × (AL | HL | NU)
|
||||||
|
// LB30a: Break between two regional indicator symbols if and only if there are an even number of regional indicators preceding the position of the break.
|
||||||
|
// ✗ Our ucd_* functions never break within grapheme clusters.
|
||||||
|
// LB30b: Do not break between an emoji base (or potential emoji) and an emoji modifier.
|
||||||
|
// ✗ Our ucd_* functions never break within grapheme clusters.
|
||||||
|
// LB31: Break everywhere else.
|
||||||
|
// ✗ Our default behavior.
|
||||||
|
#[rustfmt::skip]
|
||||||
|
pub const JOIN_RULES_LINE_BREAK: [[i32; 24]; 24] = [
|
||||||
|
/* ↓ leading → trailing codepoint */
|
||||||
|
/* | Other | WordJoiner | ZeroWidthSpace | Glue | Space | BreakAfter | BreakBefore | Hyphen | ClosePunctuation | CloseParenthesis_EA | CloseParenthesis_NotEA | Exclamation | Inseparable | Nonstarter | OpenPunctuation_EA | OpenPunctuation_NotEA | Quotation | InfixNumericSeparator | Numeric | PostfixNumeric | PrefixNumeric | SymbolsAllowingBreakAfter | Alphabetic | Ideographic | */
|
||||||
|
/* Other | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* WordJoiner | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */],
|
||||||
|
/* ZeroWidthSpace | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */],
|
||||||
|
/* Glue | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */],
|
||||||
|
/* Space | */ [X /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* BreakAfter | */ [X /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* BreakBefore | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */],
|
||||||
|
/* Hyphen | */ [X /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* ClosePunctuation | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* CloseParenthesis_EA | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* CloseParenthesis_NotEA | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */],
|
||||||
|
/* Exclamation | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* Inseparable | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* Nonstarter | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* OpenPunctuation_EA | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */],
|
||||||
|
/* OpenPunctuation_NotEA | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */],
|
||||||
|
/* Quotation | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */],
|
||||||
|
/* InfixNumericSeparator | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */],
|
||||||
|
/* Numeric | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */],
|
||||||
|
/* PostfixNumeric | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */],
|
||||||
|
/* PrefixNumeric | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */],
|
||||||
|
/* SymbolsAllowingBreakAfter | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
/* Alphabetic | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */],
|
||||||
|
/* Ideographic | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */],
|
||||||
|
];
|
Loading…
Add table
Add a link
Reference in a new issue