From f654981a07a1408e370a1b373f612b6bfde5c374 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 19 Mar 2025 03:13:50 +0100 Subject: [PATCH] Initial import --- .github/workflows/build.yml | 48 + .gitignore | 8 + .vscode/launch.json | 17 + .vscode/tasks.json | 24 + Cargo.lock | 90 + Cargo.toml | 34 + LICENSE | 21 + README.md | 3 + assets/Microsoft_logo_(1980).svg | 26 + assets/microsoft.png | Bin 0 -> 5775 bytes assets/microsoft.sixel | 1 + build.rs | 12 + src/apperr.rs | 80 + src/buffer.rs | 2299 +++++++++++++++++++ src/edit.exe.manifest | 22 + src/framebuffer.rs | 536 +++++ src/fuzzy.rs | 234 ++ src/helpers.rs | 382 ++++ src/icu.rs | 1027 +++++++++ src/input.rs | 488 ++++ src/loc.rs | 663 ++++++ src/main.rs | 1067 +++++++++ src/memchr.rs | 491 ++++ src/sys.rs | 10 + src/sys/unix.rs | 353 +++ src/sys/windows.rs | 524 +++++ src/trust_me_bro.rs | 7 + src/tui.rs | 2958 +++++++++++++++++++++++++ src/ucd.rs | 705 ++++++ src/ucd_gen.rs | 1066 +++++++++ src/utf8.rs | 217 ++ src/vt.rs | 319 +++ tools/build_release_windows.bat | 11 + tools/grapheme-table-gen/Cargo.lock | 380 ++++ tools/grapheme-table-gen/Cargo.toml | 12 + tools/grapheme-table-gen/src/main.rs | 850 +++++++ tools/grapheme-table-gen/src/rules.rs | 279 +++ 37 files changed, 15264 insertions(+) create mode 100644 .github/workflows/build.yml create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 .vscode/tasks.json create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 assets/Microsoft_logo_(1980).svg create mode 100644 assets/microsoft.png create mode 100644 assets/microsoft.sixel create mode 100644 build.rs create mode 100644 src/apperr.rs create mode 100644 src/buffer.rs create mode 100644 src/edit.exe.manifest create mode 100644 src/framebuffer.rs create mode 100644 src/fuzzy.rs create mode 100644 src/helpers.rs create mode 100644 src/icu.rs create mode 100644 src/input.rs create mode 100644 src/loc.rs create mode 100644 src/main.rs create mode 100644 src/memchr.rs create mode 100644 src/sys.rs create mode 100644 src/sys/unix.rs create mode 100644 src/sys/windows.rs create mode 100644 src/trust_me_bro.rs create mode 100644 src/tui.rs create mode 100644 src/ucd.rs create mode 100644 src/ucd_gen.rs create mode 100644 src/utf8.rs create mode 100644 src/vt.rs create mode 100644 tools/build_release_windows.bat create mode 100644 tools/grapheme-table-gen/Cargo.lock create mode 100644 tools/grapheme-table-gen/Cargo.toml create mode 100644 tools/grapheme-table-gen/src/main.rs create mode 100644 tools/grapheme-table-gen/src/rules.rs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..62ded9a --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,48 @@ +name: build + +on: + push: + branches: + - main + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + runs-on: windows-2022 + strategy: + matrix: + toolchain: + - nightly + arch: + - x64 + - arm64 + steps: + # The Windows runners have autocrlf enabled by default. + - name: Disable git autocrlf + run: git config --global core.autocrlf false + - name: Checkout + uses: actions/checkout@v4 + - name: Install nightly + run: | + rustup toolchain install --no-self-update --profile minimal --component rust-src -- nightly + rustup default nightly + rustup target add ${{ matrix.arch == 'arm64' && 'aarch64-pc-windows-msvc' || 'x86_64-pc-windows-msvc' }} + - name: Test + if: matrix.arch == 'x64' + run: cargo test + - name: Build + run: | + if ("${{ matrix.arch }}" -eq "arm64") { + .\tools\build_release_windows.bat --target aarch64-pc-windows-msvc + } else { + .\tools\build_release_windows.bat + } + - name: Upload + uses: actions/upload-artifact@v4 + with: + name: Windows ${{ matrix.arch }} + path: | + ${{ github.workspace }}/target/${{ matrix.arch == 'arm64' && 'aarch64-pc-windows-msvc/release' || 'release' }}/edit.exe + ${{ github.workspace }}/target/${{ matrix.arch == 'arm64' && 'aarch64-pc-windows-msvc/release' || 'release' }}/edit.pdb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b7c9270 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.idea +.vs +*.user +bin +CMakeSettings.json +obj +out +target diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..f907882 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Launch Debug", + "preLaunchTask": "rust: cargo build", + "type": "cppvsdbg", + "request": "launch", + "console": "externalTerminal", + "program": "${workspaceFolder}/target/debug/edit", + "args": [ + "${workspaceFolder}/README.md" + ], + "cwd": "${workspaceFolder}", + } + ] +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..daeaf8c --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,24 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "rust: cargo build", + "type": "process", + "command": "cargo", + "args": [ + "build", + "--package", + "edit", + "--features", + "debug-latency" + ], + "group": { + "kind": "build", + "isDefault": true + }, + "problemMatcher": [ + "$rustc" + ] + } + ] +} diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..10ed248 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,90 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "edit" +version = "0.1.0" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "libc" +version = "0.2.171" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..81d9b94 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "edit" +version = "0.1.0" +edition = "2024" + +[features] +debug-layout = [] +debug-latency = [] + +[profile.release] +codegen-units = 1 +debug = "full" +lto = true +panic = "abort" +debug-assertions = true # Temporary while I test this + +[dependencies] + +[target.'cfg(unix)'.dependencies] +libc = "0.2" + +[target.'cfg(windows)'.dependencies.windows-sys] +version = "0.59" +features = [ + "Win32_Globalization", + "Win32_Security", + "Win32_Storage_FileSystem", + "Win32_System_Console", + "Win32_System_Diagnostics_Debug", + "Win32_System_IO", + "Win32_System_LibraryLoader", + "Win32_System_Memory", + "Win32_System_Threading", +] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..88040d8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Microsoft Corporation. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..6328e65 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# MS-DOS Editor Redux + +TBA diff --git a/assets/Microsoft_logo_(1980).svg b/assets/Microsoft_logo_(1980).svg new file mode 100644 index 0000000..473c8d5 --- /dev/null +++ b/assets/Microsoft_logo_(1980).svg @@ -0,0 +1,26 @@ + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + diff --git a/assets/microsoft.png b/assets/microsoft.png new file mode 100644 index 0000000000000000000000000000000000000000..1092d4c74a2f016988c20816533d6a3938317d62 GIT binary patch literal 5775 zcmV;A7I5i_P) zaB^>EX>4U6ba`-PAZ2)IW&i+q+O3*bawNGGMgOr1EdlPqEC)71JJ9lVE{f#z%yj5O z5;@H(7BgX`|reQ)u-2TvLk>HPaKm}8D=e0nITtz~{{jMW> z0sj8}>lcY-kjzyIb7OkICgYNkK4+n^ne~QXx4lYHseS;awXoem~eetnqQJ-$Mk!i797f zEapJQt1>`AN9D}7kaJY#oO0%Sq$@14N>g%nP{zn$J|Wr-Ke_uObAQX5qwoLQyv1KL z=ajntN9LST_nXZ9J#W9s+7`Yq|GLq=Hhc(}Xg@AEZN$bE{D^b+bO;e}Q7>_yo=va} zqMCQKd`^qIFKyb@8DhG#UxpcUSsG2X(JBKZZtEv91aG1*_PLq8Rl={3?X3G|xXazs zcU>nfdeM`iQ|2zLpWl?5NjOhzyEdQJewdzaTvqq;txYmHVr*o|g=Y5poG2WvWW~}V z;VXO0+h(${`LWinx6=IG=C$0&YA!MCxlQ{G+xb#wUra^bZmaVOEc2+b_EHmd+ z&n+aEtYw(8+(;dS1?rtX+a0yAqH{aZzK2K<)UwOTtSi?OL(Ah(9yx6-tJ(zEd!J&U zaul6>=FN#{Tz6wUXViP`bTIa?o|%AJq95~3E17O6eoWr%kZ1O+a*+#zW|*;rT`3qB zSnTzey{|FWD6lCzo@?fGZ1_MLPwr>=J9@Bl?nsJ2uH3XI#gO|>#M7;v$mGd{DWui; zkM;Ff2ld&?K5^EY(i+*%Q&R6?ZZ<|c9TpwNJ(23)qq?J-I3aJQ%QMpRk%~R1)366p zY@x*w!mXe{3B;%-!nUMMsM@~#>hwW8j%=K&+T@DCH8oPdrNIkCnaBb0V$_umQDqVN zti_(lPd~ZRV0D=>k}D7eC1N@H%iGEB`rT!orD|9d`w8o|0z-RWZz-hi_*w}FE83f8 zjbtoEt zQ)cX419=`ciBp9GZy%*CUJUl|tAGlNO&?dgc>Yq1cD9eVu0+mluI;I64}i@=mW)E0 zD$Tu1Al@R^-AG5>gc1aDQAk$^6o{}4)x@2LEPj*;Dc(G~lKE;!_t zcnf#b<966W;b>z73Xnq0WDWp^*;l81YTJoQqn;2)p02rbhC|wof{as}qtTPAEtR^h zq_(M`*%^1AZE(vTKtF}TJygB*ISX(`WHw>Vvn2iwD>SOJgyIYo8v~?9-hncy>H>v- ztR!gUrxXy-#Ad(+XmQ85+hko8WtRz7XUUF)v9aS1DI!Vm0p)op%8<{X-s9eTm3arK zTQXP2GnFf#$){g6MwTif0VPkdn| z1bYH_z?vZCcACke^f#Mk#rzm(WuRmg3fCr;Us$uZzB^lY)dqcj=!>BJSTqnksoa+o zf{<-%oM|vLX$4n%UuoWwAwHw4D0dpg4Puw>sXopFn$^i{sFt(i*x(gCK{uWPZ_`xV zT<$R7gKHt#%H}%#1Zu*xHQ#Nlo;wO`-w1jvJ5vOkm9-&ykeEPyuHF4+3738dwJrKA zX&y+=?E>1+X4gQgDe7FAJ%>$u(K6^=)?C04Xry5dMq^x&(#kk7g+i+l0(tyc zDT=YVWDw@5Qe~b9?L}s+SP}w3`*0W1FMwLLb%K08AZXHdh!=VdxxQvRDfHr)-8ku?Ag zfjE%pv<>9bD5u5th#qweJksHy2UP*=ff9IH0*xqV?z(h-k+Vk%o759909pg5*c(rX z=4%oe$BCGwAZTq=1^tASbl3xk323DlcZ%q9JFJf z^q`K6R7e}8NJ%5F<~^APd+rAqh?48t2WBn6I?x?0#YGGEif|e(g7ld|bwdy6!6zEV z&7EezIMejUzu^!Gpf*Z(db9B+Xe5QvkIAq3JW|v_QIpqQpdE0$h9n){B-JPNagYoN z)MpG22}L?VP7Pt+I$ej#oV4a2uW;LzAB(al3bm>$sJv@?E_Hg?b= zX!Ldrj(i8O7kxs`L7QPu>|9V76hCXq%jsyXp+?bMRAPerr)-XH`s+9fIpfHJVzI#H zs=rbpO&DIf%+{uwqP_WlEL@F#Bs#p?YA$;h0 z427p>gyhWu#OMezkAs4DFf-U=1P;k7z6)HpykJDT$UkfXX@IiMp7?RFxX5id%=IE+ ziq!_`kJif|i$(9uiX(ajTgO{yV2)PCj1|{t4F*Cb1G5d?MxkYTZ3~Y)a1Lw;^O01s ztCJ2hN6Q;Mtxm5`6Xc7v0|gWg8g{6}B>Ek4g)-H;pjymDB-#|H1x;;n!jm|YSVhw~ zgGFUxY3Ui55G-a9^!usZVTkCzg;~+I*mMLxSn^5l=nC7BC-s~+s_l}(u0uMO3Wv>_ zB#)(P0)EXHLafQoVNED5g&1jPqZLlEccMZGKy4k2lRz4CpD==|KEBEIAVvo;M;L z?gbAF;zHYi2?l3ilo<$SqR9fdA;(UI!)E0jx5=`M*ij6iE9i#Q?jaz42CQQ`i)%^| z3KYTKQ^s3tX`LkSE*5+NmrH#b9r|mq3(<~<%ly1O95-}ep~Esz0-IC!bm(%F7vm#i z&S-uTXjZUfB!CYfTHQ%SZJ;tebwR$!wXE$rNe7%p*E`Gt%2GcOnRKA^e^a7}hG9aG z8f>6KLoz$?%BbbAemqBzIO*mG+rJKtOEq8IX%LNoYkN!u50FfD3AQiK`p45OaTh=y+rqS8;>YAEiX~G2s-pZ%E;4o?Vx-Vn7)u3x1ed# zMh%1uLvvw*nxfjqPaRlx6hCOItb-^k0yG)WjS*8*-?61fpFs_@oJ3`BPgI6iNcAQ% zY~*|{Y4o@?1jv^AKowYuu!+;LXTjPpALU#y8&4S0?qrU2CU)ixrG&i3Y9p9S5U`<0 zO@gR^lQ`(;2CEx7$E0BONRHq})S%Aq5E)IL3Pn;&hQ@aAQbA<}lp)h7Vp5<)l^9{# zbq+@-zuQPO`kH#^1VIO(8B4So1K#SMBD!PN(Xm=vPZSR#2UX~!J35S?^wz)(2CyFo z3z*U!k$zjNL#ueTASEaEYx3g-KR&%;3G#|=R|G~ty&|(`Xg&|kY67;Uc26vuh%rN# zzhEH<*+HdcK8ERzCqt;Wm7@cxwhpIL={itN=<$eIjyPK#h{vL*g#eDYoPGyGO(;c~ z;J7DEsG|vJ%w3>?zQR}~1U~W(S%)vATXd8{pQC1EgVRul$nGjPgBmN8BH)mFV+YM~ z(|t4PjG4No-sw)-cMkOG)p;TWOIfv~Lsg02{!Y3m-URnh?zCl_&b-cu^+1lg7@6G? zqo9Yd2!c5Eh=iOV+|qz|5vBFUefhPfxWyHqbfV21W&xL_=Nnze!NT;AKOKP1XSE~} z&_~eItTX`rZ{|^S6n`X2SuvVtRQJ4*2)08nP%S+IF&90&Mfa}|$H_W-g6S3(6ihzK z70y)WmGwO}(IJBKsxumnHp2E6BtAH+{)Aiict`7>Cm-*C^KN-66ZpZ_I%kq6uW*=S zIJ6w zVU?~yT>*$(0aU)dDj(%@?w4Fv)UOWD(TH)rgk=G$aJplmb?~n9M_f1RbO5sG7%sO^`K`NvY13%Uw=5me>fv6o(d?^ngHUe zK1_zfkS5O(nqfGa8)ss)4lNI49t?T`5#Ql@+kf%5?LE~%^5YtWfs|jKz~{*Bv9jbD z^$~0WJm$d>g1=--xx8p7%9$1vKAiXeI3zjQiN%mT92MnXb$&?TbW)WQB0K3(Y(`Sl zjq0B7k>uxi}YIF(!%oeEqvNy4^5n4t0y5HqD0XyR_r|6}PPxnKY$j z5QWiD$b~1QOvfvvOs7{$nGQTIQu-18M~OTxT;>ssCxuHZlLn6!HfI-*&?TC|Lh$`u z&YoQ>wLSxp4R~9x6cJ4WJ~YuIQOH22l>gKbr6J*iqUwP?6F$(nBN{WNRp-B8WOk&K z!L%h=#p8o4zvbzb_TMlP`G-7={ppfF68hy-M)f{o`ZpiLVH%AvJ>k6AAr;Ija}zQA zi)Z-sXbEV}*614`G9)JDlFs8}wKl7_Su%|Ij75$&1|jMe8*s%7Pr0DeFfleMtg7g>#T){2 z;gA%J6;@bPxeZ3-D%K}<8L%K2L|QQZ@d1lim5sV$y1`6x8A}JO&)CeEO>Pv74wzZ7 zSJ~h+C|K1K(<} zT#kb81jCgUOvA^&Yv>1(n4(L&nD+F542R#?Nbo>xu*M;VZs~rpB|Wj#ySKrHB)PyT10#{-JhphD(X=LavqnB%qF^0Sn^}yQoXyDKdBG%3lBW*~ zHf7l+9T{_iNk}-g@7Ya)g)Cdp6Jvv55@6H~@?1j%8?$Vq4vbmB#4gXA3>+`Pe3s4Y zkuf8fnD3rr$@NXJA{$C2V_Gm44*DPlST`{Cp~%Limt!QDh7oXa*a+VDj9nMmRQsTg zyHI-B9alxR4W^b7s^FC+mAj=D))J7Kj|J1U7Owwm&al4LLba49e}b8IG!Kj?n58Yn z#q&%|wOBpUK{lmY>}0GyU)S!3+6hPO^V+?xo$-i$T)S80{SWUsbtfDAwh90M N002ovPDHLkV1h*w`(^+D literal 0 HcmV?d00001 diff --git a/assets/microsoft.sixel b/assets/microsoft.sixel new file mode 100644 index 0000000..bf53919 --- /dev/null +++ b/assets/microsoft.sixel @@ -0,0 +1 @@ +P;1q"1;1;300;60#0;2;100;100;100#0!42?_ow{}!12?_ow{}!6?_ow{}}!5?_ow{{}}}!17~^NFbpw{}!8~!4}{wwo_!12?_oow{{{!4}!6~!4}{{wwo__!4?_ow{{}}}!23~^Nfrxw{{}}}!9~!4}{{woo_!12?_ow{}!15~^NFbpw{}!17~^NFB@-!36?_ow{}!6~!6?_ow{}!6~??w{}!7~?o{}!10~^^!10NFBpw{}!6~!8N^!9~{_!4?_o{}!8~^^!9N^^!9~{w}!8~^!18NFbx{}!9~^^!8N^^!9~}{o???ow{}!6~!11NFB@GKM!5N!10~!4NFB@-!30?_ow{}!12~_ow{}!12~??!20~FB@!15?!10~!10?r!9~???{!8~NB@!15?@FN!16~!4{!4wooo__!5?_}!8~^FB!16?@F^!8~{o!10~!9o!13?!10~-!24?_ow{}!35~??!19~x!18?!10~?CK[!4{}!9~^B??N!8~x!21?!10~N^^!18~}{o!10~!22?!29~!13?!10~-!18?_ow{}!8~^NFB@?!11~^NFB@?!10~??!10~F!9~}{wo__!12?!10~!5?@BFN^!9~}{wof^!7~}wo__!11?__o{!9~N@!7?!6@Bb!10~N!9~{o__!12?__o{}!8~F@!10~!9B!13?!10~-!12?_ow{}!8~^NFB@!7?!5~^NFB@!7?!10~??!10~??@FN^!20~??!10~!11?@BFN^!23~!7}!10~^NFB~!12}!12~^NB??BFN^!9~!10}!9~^NF@???!10~!22?!5~^NFB@-!6?_ow{}!8~^NFB@!13?FFB@!13?!10F??!10F!7?@@BB!15F??!10F!17?@BFN^!10~|zrfFF!10NFFFBB@@!5?!21FBB@!11?@BBFFNNN!10^NNNFFBB@!8?!10~!22?NFB@-_ow{}!8~^NFB@!119?@BFN^!9~}{wo!88?!10~-!7~^NFB@!131?@BFN^!7~!88?!7~^NF-~^NFB@!143?@BFN^~!88?~^NFB@\ diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..ff0378d --- /dev/null +++ b/build.rs @@ -0,0 +1,12 @@ +fn main() { + if std::env::var("CARGO_CFG_TARGET_OS").unwrap_or_default() == "windows" + && std::env::var("CARGO_CFG_TARGET_ENV").unwrap_or_default() == "msvc" + { + let path = std::path::absolute("src/edit.exe.manifest").unwrap(); + let path = path.to_str().unwrap(); + println!("cargo::rerun-if-changed=src/edit.exe.manifest"); + println!("cargo::rustc-link-arg-bin=edit=/MANIFEST:EMBED"); + println!("cargo::rustc-link-arg-bin=edit=/MANIFESTINPUT:{}", path); + println!("cargo::rustc-link-arg-bin=edit=/WX"); + } +} diff --git a/src/apperr.rs b/src/apperr.rs new file mode 100644 index 0000000..60e98c4 --- /dev/null +++ b/src/apperr.rs @@ -0,0 +1,80 @@ +use crate::sys; +use std::num::NonZeroU32; +use std::{fmt, io, result}; + +// Remember to add an entry to `Error::message()` for each new error. +pub const APP_ICU_MISSING: Error = Error::new_app(1); +pub const APP_FILE_NOT_FOUND: Error = Error::new_app(2); + +pub type Result = result::Result; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Error(NonZeroU32); + +impl Error { + const FLAGS_MASK: u32 = 0xF8000000; // Top 5 bits + const FLAGS_CUSTOM_FAILURE: u32 = 0xA0000000; + + const TAG_APP: u32 = Self::FLAGS_CUSTOM_FAILURE | (1 << 16); + const TAG_ICU: u32 = Self::FLAGS_CUSTOM_FAILURE | (2 << 16); + + pub const unsafe fn new(code: u32) -> Self { + Error(unsafe { NonZeroU32::new_unchecked(code) }) + } + + pub const fn new_app(code: u32) -> Self { + debug_assert!(code > 0 && code <= 0xFFFF); + unsafe { Self::new(Self::TAG_APP | code) } + } + + pub const fn new_icu(code: u32) -> Self { + debug_assert!(code > 0 && code <= 0xFFFF); + unsafe { Self::new(Self::TAG_ICU | code) } + } + + pub fn is_app(&self) -> bool { + (self.0.get() & 0xFFFF0000) == Self::TAG_APP + } + + pub fn is_icu(&self) -> bool { + (self.0.get() & 0xFFFF0000) == Self::TAG_ICU + } + + pub fn code(&self) -> u32 { + self.0.get() & 0xFFFF + } + + pub fn value(&self) -> u32 { + self.0.get() + } + + pub fn message(self) -> String { + match self { + APP_ICU_MISSING => "ICU not found".to_string(), + APP_FILE_NOT_FOUND => "File not found".to_string(), + _ => { + debug_assert!(!self.is_app()); + if self.is_icu() { + format!("ICU Error {:#08x}", self.code()) + } else { + sys::format_error(self) + } + } + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:#08x}", self.0) + } +} + +impl From for Error { + fn from(err: io::Error) -> Self { + match err.kind() { + io::ErrorKind::NotFound => APP_FILE_NOT_FOUND, + _ => sys::io_error_to_apperr(err), + } + } +} diff --git a/src/buffer.rs b/src/buffer.rs new file mode 100644 index 0000000..e95016e --- /dev/null +++ b/src/buffer.rs @@ -0,0 +1,2299 @@ +//! Implements a Unicode-aware, layout-aware text buffer for terminals. +//! It's based on a gap buffer. It has no line cache and instead relies +//! on the performance of the ucd module for fast text navigation. +//! +//! If the project ever outgrows a basic gap buffer (e.g. to add time travel) +//! an ideal, alternative architecture would be a piece table with immutable trees. +//! The tree nodes can be allocated on the same arena allocator as the added chunks, +//! making lifetime management fairly easy. The algorithm is described here: +//! * https://cdacamar.github.io/data%20structures/algorithms/benchmarking/text%20editors/c++/editor-data-structures/ +//! * https://github.com/cdacamar/fredbuf +//! +//! The downside is that text navigation & search takes a performance hit due to small chunks. +//! The solution to the former is to keep line caches, which further complicates the architecture. +//! There's no solution for the latter. However, there's a chance that the performance will still be sufficient. + +use crate::framebuffer::{Framebuffer, IndexedColor}; +use crate::helpers::{self, COORD_TYPE_SAFE_MAX, CoordType, Point, Rect}; +use crate::memchr::memchr2; +use crate::ucd::Document; +use crate::{apperr, icu, sys, trust_me_bro, ucd}; +use std::borrow::Cow; +use std::cell::UnsafeCell; +use std::collections::LinkedList; +use std::fmt::Write as _; +use std::fs::File; +use std::io::Read as _; +use std::io::Write as _; +use std::mem::MaybeUninit; +use std::ops::{Deref, DerefMut}; +use std::path::Path; +use std::rc::Rc; +use std::{mem, ptr, slice, str}; + +/// The margin template is used for line numbers. +/// The max. line number we should ever expect is probably 64-bit, +/// and so this template fits 19 digits, followed by " │ ". +const MARGIN_TEMPLATE: &str = " │ "; +/// Just a bunch of whitespace you can use for turning tabs into spaces. +/// Happens to reuse MARGIN_TEMPLATE, because it has sufficient whitespace. +const TAB_WHITESPACE: &str = MARGIN_TEMPLATE; + +#[derive(Copy, Clone)] +pub struct TextBufferStatistics { + logical_lines: CoordType, + visual_lines: CoordType, +} + +#[derive(Copy, Clone)] +enum TextBufferSelection { + None, + Active { beg: Point, end: Point }, + Done { beg: Point, end: Point }, +} +impl TextBufferSelection { + fn is_some(&self) -> bool { + !matches!(self, Self::None) + } +} + +#[derive(Copy, Clone, Eq, PartialEq)] +enum HistoryType { + None, + CursorMovement, + Write, + Delete, +} + +struct HistoryEntry { + /// Logical cursor position before the change was made. + cursor_before: Point, + selection_before: TextBufferSelection, + stats_before: TextBufferStatistics, + generation_before: u32, + /// Logical cursor position where the change took place. + /// The position is at the start of the changed range. + cursor: Point, + /// Text that was deleted from the buffer. + deleted: Vec, + /// Text that was added to the buffer. + added: Vec, +} + +struct ActiveSearch { + pattern: String, + options: SearchOptions, + text: icu::Text, + regex: icu::Regex, + at_start: bool, + no_matches: bool, +} + +#[derive(Default, Clone, Copy, Eq, PartialEq)] +pub struct SearchOptions { + pub match_case: bool, + pub whole_word: bool, + pub use_regex: bool, +} + +/// Caches the start and length of the active edit line for a single edit. +/// This helps us avoid having to remeasure the buffer after an edit. +struct ActiveEditLineInfo { + /// Points to the start of the currently being edited line. + safe_start: ucd::UcdCursor, + line_height_in_rows: CoordType, + distance_next_line_start: usize, +} + +pub enum CursorMovement { + Grapheme, + Word, +} + +#[derive(Clone)] +pub struct RcTextBuffer(Rc>); + +impl RcTextBuffer { + pub fn new(small: bool) -> apperr::Result { + let tb = TextBuffer::new(small)?; + Ok(Self(Rc::new(UnsafeCell::new(tb)))) + } +} + +impl Deref for RcTextBuffer { + type Target = TextBuffer; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.0.get() } + } +} + +impl DerefMut for RcTextBuffer { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.0.get() } + } +} + +pub struct TextBuffer { + buffer: GapBuffer, + + undo_stack: LinkedList, + redo_stack: LinkedList, + last_history_type: HistoryType, + last_save_generation: u32, + + active_edit_line_info: Option, + active_edit_depth: i32, + active_edit_off: usize, + + stats: TextBufferStatistics, + cursor: ucd::UcdCursor, + // When scrolling significant amounts of text away from the cursor, + // rendering will naturally slow down proportionally to the distance. + // To avoid this, we cache the cursor position for rendering. + // Must be cleared on every edit or reflow. + cursor_for_rendering: Option, + selection: TextBufferSelection, + search: Option, + + width: CoordType, + margin_width: CoordType, + margin_enabled: bool, + word_wrap_column: CoordType, + word_wrap_enabled: bool, + tab_size: CoordType, + indent_with_tabs: bool, + ruler: CoordType, + encoding: &'static str, + newlines_are_crlf: bool, + overtype: bool, + + wants_cursor_visibility: bool, +} + +impl TextBuffer { + pub fn new(small: bool) -> apperr::Result { + Ok(Self { + buffer: GapBuffer::new(small)?, + + undo_stack: LinkedList::new(), + redo_stack: LinkedList::new(), + last_history_type: HistoryType::None, + last_save_generation: 0, + + active_edit_line_info: None, + active_edit_depth: 0, + active_edit_off: 0, + + stats: TextBufferStatistics { + logical_lines: 1, + visual_lines: 1, + }, + cursor: ucd::UcdCursor::default(), + cursor_for_rendering: None, + selection: TextBufferSelection::None, + search: None, + + width: 0, + margin_width: 0, + margin_enabled: false, + word_wrap_column: CoordType::MAX, + word_wrap_enabled: false, + tab_size: 4, + indent_with_tabs: false, + ruler: CoordType::MAX, + encoding: "UTF-8", + newlines_are_crlf: false, + overtype: false, + + wants_cursor_visibility: false, + }) + } + + pub fn text_length(&self) -> usize { + self.buffer.len() + } + + pub fn is_dirty(&self) -> bool { + self.last_save_generation != self.buffer.generation + } + + pub fn mark_as_dirty(&mut self) { + self.last_save_generation = self.buffer.generation.wrapping_sub(1); + } + + fn mark_as_clean(&mut self) { + self.last_save_generation = self.buffer.generation; + } + + pub fn encoding(&self) -> &'static str { + self.encoding + } + + pub fn is_crlf(&self) -> bool { + self.newlines_are_crlf + } + + pub fn normalize_newlines(&mut self, crlf: bool) { + let newline: &[u8] = if crlf { b"\r\n" } else { b"\n" }; + let mut off = 0; + + let mut cursor_offset = self.cursor.offset; + let mut cursor_for_rendering_offset = self + .cursor_for_rendering + .map_or(cursor_offset, |c| c.offset); + + #[cfg(debug_assertions)] + let mut adjusted_newlines = 0; + + 'outer: loop { + // Seek to the offset of the next line start. + loop { + let chunk = self.read_forward(off); + if chunk.is_empty() { + break 'outer; + } + + let (delta, line) = ucd::newlines_forward(chunk, 0, 0, 1); + off += delta; + if line == 1 { + break; + } + } + + // Get the preceding newline. + let chunk = self.read_backward(off); + let chunk_newline_len = if chunk.ends_with(b"\r\n") { 2 } else { 1 }; + let chunk_newline = &chunk[chunk.len() - chunk_newline_len..]; + + if chunk_newline != newline { + // If this newline is still before our cursor position, then it still has an effect on its offset. + // Any newline adjustments past that cursor position are irrelevant. + let delta = newline.len() as isize - chunk_newline_len as isize; + if off <= cursor_offset { + cursor_offset = cursor_offset.saturating_add_signed(delta); + #[cfg(debug_assertions)] + { + adjusted_newlines += 1; + } + } + if off <= cursor_for_rendering_offset { + cursor_for_rendering_offset = + cursor_for_rendering_offset.saturating_add_signed(delta); + } + + // Replace the newline. + off -= chunk_newline_len; + let gap = self + .buffer + .allocate_gap(off, newline.len(), chunk_newline_len); + gap.copy_from_slice(newline); + self.buffer.commit_gap(newline.len()); + off += newline.len(); + } + } + + // If this fails, the cursor offset calculation above is wrong. + #[cfg(debug_assertions)] + debug_assert_eq!(adjusted_newlines, self.cursor.logical_pos.y); + + self.cursor.offset = cursor_offset; + if let Some(cursor) = &mut self.cursor_for_rendering { + cursor.offset = cursor_for_rendering_offset; + } + + self.newlines_are_crlf = crlf; + } + + pub fn is_overtype(&self) -> bool { + self.overtype + } + + pub fn set_overtype(&mut self, overtype: bool) { + self.overtype = overtype; + } + + pub fn get_logical_line_count(&self) -> CoordType { + self.stats.logical_lines + } + + pub fn get_visual_line_count(&self) -> CoordType { + self.stats.visual_lines + } + + pub fn get_cursor_logical_pos(&self) -> Point { + self.cursor.logical_pos + } + + pub fn get_cursor_visual_pos(&self) -> Point { + self.cursor.visual_pos + } + + pub fn get_margin_width(&self) -> CoordType { + self.margin_width + } + + pub fn get_text_width(&self) -> CoordType { + self.width - self.margin_width + } + + pub fn is_word_wrap_enabled(&self) -> bool { + self.word_wrap_enabled + } + + pub fn make_cursor_visible(&mut self) { + self.wants_cursor_visibility = true; + } + + pub fn take_cursor_visibility_request(&mut self) -> bool { + mem::take(&mut self.wants_cursor_visibility) + } + + // NOTE: It's expected that the tui code calls `set_width()` sometime after this. + // This will then trigger the actual recalculation of the cursor position. + pub fn toggle_word_wrap(&mut self) { + self.word_wrap_enabled = !self.word_wrap_enabled; + self.width = 0; // Force a reflow. + self.make_cursor_visible(); + } + + pub fn set_width(&mut self, width: CoordType) -> bool { + if width <= 0 || width == self.width { + return false; + } + + self.width = width; + self.reflow(true); + true + } + + pub fn set_margin_enabled(&mut self, enabled: bool) -> bool { + if self.margin_enabled == enabled { + return false; + } + + self.margin_enabled = enabled; + self.reflow(true); + true + } + + pub fn tab_size(&self) -> CoordType { + self.tab_size + } + + pub fn set_tab_size(&mut self, width: CoordType) -> bool { + if width <= 0 || width == self.tab_size { + return false; + } + + self.tab_size = width; + self.reflow(true); + true + } + + pub fn set_ruler(&mut self, column: Option) { + self.ruler = column.unwrap_or(CoordType::MAX); + } + + fn reflow(&mut self, force: bool) { + // +1 onto logical_lines, because line numbers are 1-based. + // +1 onto log10, because we want the digit width and not the actual log10. + // +3 onto log10, because we append " | " to the line numbers to form the margin. + self.margin_width = if self.margin_enabled { + self.stats.logical_lines.ilog10() as CoordType + 4 + } else { + 0 + }; + + let word_wrap_column = if self.word_wrap_enabled { + self.get_text_width() + } else { + CoordType::MAX + }; + + if force || self.word_wrap_column != word_wrap_column { + self.word_wrap_column = word_wrap_column; + + if self.cursor.offset != 0 { + self.cursor = self.cursor_move_to_logical_internal( + ucd::UcdCursor::default(), + self.cursor.logical_pos, + ); + } + + // Recalculate the line statistics. + if self.word_wrap_enabled { + let end = self.cursor_move_to_logical_internal(self.cursor, Point::MAX); + self.stats.visual_lines = end.visual_pos.y + 1; + } else { + self.stats.visual_lines = self.stats.logical_lines; + } + } + + self.cursor_for_rendering = None; + } + + pub fn indent_with_tabs(&self) -> bool { + self.indent_with_tabs + } + + pub fn set_indent_with_tabs(&mut self, indent_with_tabs: bool) { + self.indent_with_tabs = indent_with_tabs; + } + + pub fn set_encoding(&mut self, encoding: &'static str) { + self.encoding = encoding; + self.buffer.generation = self.buffer.generation.wrapping_add(1); + } + + /// Replaces the entire buffer contents with the given `text`. + /// Assumes that the line count doesn't change. + pub fn copy_from_str(&mut self, text: &str) { + if self.buffer.copy_from_str(text) { + self.recalc_after_content_swap(); + self.cursor_move_to_logical(Point::MAX); + } + } + + pub fn debug_replace_everything(&mut self, text: &str) { + if self.buffer.copy_from_str(text) { + let before = self.cursor.logical_pos; + let end = self.cursor_move_to_logical_internal( + ucd::UcdCursor::default(), + Point { + x: 0, + y: CoordType::MAX, + }, + ); + self.stats.logical_lines = end.logical_pos.y + 1; + self.stats.visual_lines = self.stats.logical_lines; + self.recalc_after_content_swap(); + self.cursor_move_to_logical(before); + } + } + + fn recalc_after_content_swap(&mut self) { + // If the buffer was changed, nothing we previously saved can be relied upon. + self.undo_stack.clear(); + self.redo_stack.clear(); + self.last_history_type = HistoryType::None; + self.cursor = ucd::UcdCursor::default(); + self.cursor_for_rendering = None; + self.selection = TextBufferSelection::None; + self.search = None; + self.mark_as_clean(); + self.reflow(true); + } + + /// Copies the contents of the buffer into a string. + pub fn save_as_string(&mut self, dst: &mut String) { + self.buffer.copy_into_string(dst); + self.mark_as_clean(); + } + + /// Reads a file from disk into the text buffer, detecting encoding and BOM. + pub fn read_file( + &mut self, + file: &mut File, + encoding: Option<&'static str>, + ) -> apperr::Result<()> { + let mut read = 0; + + #[allow(invalid_value)] + let mut buf: [u8; 4096] = unsafe { MaybeUninit::uninit().assume_init() }; + let mut first_chunk_len = 0; + + // Read enough bytes to detect the BOM. + while first_chunk_len < BOM_MAX_LEN { + read = file.read(&mut buf[first_chunk_len..])?; + if read == 0 { + break; + } + first_chunk_len += read; + } + + if let Some(encoding) = encoding { + self.encoding = encoding; + } else { + let bom = detect_bom(&buf[..first_chunk_len]); + self.encoding = bom.unwrap_or("UTF-8"); + } + + // TODO: Since reading the file can fail, we should ensure that we also reset the cursor here. + // I don't do it, so that `recalc_after_content_swap()` works. + self.buffer.clear(); + + let done = read == 0; + if self.encoding == "UTF-8" { + self.read_file_as_utf8(file, &mut buf, first_chunk_len, done)?; + } else { + self.read_file_with_icu(file, &mut buf, first_chunk_len, done)?; + } + + // Figure out + // * the logical line count + // * the newline type (LF or CRLF) + // * the indentation type (tabs or spaces) + { + let chunk = self.read_forward(0); + let mut offset = 0; + let mut lines = 0; + // Number of lines ending in CRLF. + let mut crlf_count = 0; + // Number of lines starting with a tab. + let mut tab_indentations = 0; + // Number of lines starting with a space. + let mut space_indentations = 0; + // Histogram of the indentation depth of lines starting with between 2 and 8 spaces. + // In other words, `space_indentation_sizes[0]` is the number of lines starting with 2 spaces. + let mut space_indentation_sizes = [0; 7]; + + loop { + (offset, lines) = ucd::newlines_forward(chunk, offset, lines, lines + 1); + assert!(offset <= chunk.len()); + + // Check if the preceding line ended in CRLF. + if offset >= 2 && &chunk[offset - 2..offset] == b"\r\n" { + crlf_count += 1; + } + + // Check if the line starts with a tab. + if offset < chunk.len() && chunk[offset] == b'\t' { + tab_indentations += 1; + } else { + // Otherwise, check how many spaces the line starts with. Searching for >8 spaces + // allows us to reject lines that have more than 1 level of indentation. + let space_indentation = chunk[offset..] + .iter() + .take(9) + .take_while(|&&c| c == b' ') + .count(); + // We'll also reject lines starting with 1 space, because that's too fickle as a heuristic. + if (2..=8).contains(&space_indentation) { + space_indentations += 1; + space_indentation_sizes[space_indentation - 2] += 1; + } + } + + // We'll limit our heuristics to the first 100 lines. + // That should hopefully be enough in practice. + if offset >= chunk.len() || lines >= 100 { + break; + } + } + + // We'll assume CRLF if more than half of the lines end in CRLF. + let newlines_are_crlf = crlf_count >= lines / 2; + + // We'll assume tabs if there are more lines starting with tabs than with spaces. + let indent_with_tabs = tab_indentations > space_indentations; + let tab_size = if indent_with_tabs { + // Tabs will get a visual size of 4 spaces by default. + 4 + } else { + // Otherwise, we'll assume the most common indentation depth. + // We can't use `max_by_key`, because that will return the largest index and we + // want the smallest (= prefer 2 over 4 over 8 if they're all equally common). + let mut max = 0; + let mut tab_size = 4; + for (i, &count) in space_indentation_sizes.iter().enumerate() { + if count > max { + max = count; + tab_size = i as CoordType + 2; + } + } + tab_size + }; + + // If the file has more than 100 lines, figure out how many are remaining. + if offset < chunk.len() { + (_, lines) = ucd::newlines_forward(chunk, offset, lines, CoordType::MAX); + } + + // Add 1, because the last line doesn't end in a newline (it ends in the literal end). + self.stats.logical_lines = lines + 1; + self.stats.visual_lines = self.stats.logical_lines; + self.newlines_are_crlf = newlines_are_crlf; + self.indent_with_tabs = indent_with_tabs; + self.tab_size = tab_size; + } + + self.recalc_after_content_swap(); + Ok(()) + } + + fn read_file_as_utf8( + &mut self, + file: &mut File, + buf: &mut [u8], + first_chunk_len: usize, + done: bool, + ) -> apperr::Result<()> { + { + let mut first_chunk = &buf[..first_chunk_len]; + if first_chunk.starts_with(b"\xEF\xBB\xBF") { + first_chunk = &first_chunk[3..]; + self.encoding = "UTF-8 BOM"; + } + + let gap = self.buffer.allocate_gap(0, first_chunk.len(), 0); + gap.copy_from_slice(first_chunk); + self.buffer.commit_gap(first_chunk.len()); + } + + if done { + return Ok(()); + } + + // If we don't have file metadata, the input may be a pipe or a socket. + // Every read will have the same size until we hit the end. + let mut chunk_size = 64 * 1024; + let mut extra_chunk_size = 8 * 1024; + + if let Ok(m) = file.metadata() { + // Usually the next read of size `chunk_size` will read the entire file, + // but if the size has changed for some reason, then `extra_chunk_size` + // should be large enough to read the rest of the file. + // 4KiB is not too large and not too slow. + let len = m.len() as usize; + chunk_size = len.saturating_sub(first_chunk_len); + extra_chunk_size = 4096; + } + + loop { + let gap = self.buffer.allocate_gap(self.text_length(), chunk_size, 0); + + let read = file.read(gap)?; + if read == 0 { + break; + } + + self.buffer.commit_gap(read); + chunk_size = extra_chunk_size; + } + + Ok(()) + } + + fn read_file_with_icu( + &mut self, + file: &mut File, + buf: &mut [u8], + first_chunk_len: usize, + mut done: bool, + ) -> apperr::Result<()> { + let mut pivot_buffer = [const { MaybeUninit::::uninit() }; 4096]; + + let mut c = icu::Converter::new(&mut pivot_buffer, self.encoding, "UTF-8")?; + + let mut first_chunk = &buf[..first_chunk_len]; + while !first_chunk.is_empty() { + let off = self.text_length(); + let gap = self.buffer.allocate_gap(off, 8 * 1024, 0); + let (input_advance, mut output_advance) = c.convert(first_chunk, gap)?; + + // Remove the BOM from the file, if this is the first chunk. + // Our caller ensures to only call us once the BOM has been identified, + // which means that if there's a BOM it must be wholly contained in this chunk. + if off == 0 { + let written = &mut gap[..output_advance]; + if written.starts_with(b"\xEF\xBB\xBF") { + written.copy_within(3.., 0); + output_advance -= 3; + } + } + + self.buffer.commit_gap(output_advance); + first_chunk = &first_chunk[input_advance..]; + } + + let mut buf_len = 0; + + loop { + if !done { + let read = file.read(&mut buf[buf_len..])?; + buf_len += read; + done = read == 0; + } + + let flush = done && buf_len == 0; + let gap = self.buffer.allocate_gap(self.text_length(), 8 * 1024, 0); + let (input_advance, output_advance) = c.convert(&buf[..buf_len], gap)?; + + self.buffer.commit_gap(output_advance); + buf_len -= input_advance; + buf.copy_within(input_advance.., 0); + + if flush { + break; + } + } + + Ok(()) + } + + /// Writes the text buffer contents to a file, handling BOM and encoding. + pub fn write_file(&mut self, path: &Path) -> apperr::Result<()> { + // TODO: Write to a temp file and do an atomic rename. + let mut file = File::create(path)?; + let mut offset = 0; + + if self.encoding.starts_with("UTF-8") { + if self.encoding == "UTF-8 BOM" { + file.write_all(b"\xEF\xBB\xBF")?; + } + loop { + let chunk = self.read_forward(offset); + if chunk.is_empty() { + break; + } + file.write_all(chunk)?; + offset += chunk.len(); + } + } else { + self.write_file_with_icu(file)?; + } + + self.mark_as_clean(); + Ok(()) + } + + fn write_file_with_icu(&mut self, mut file: File) -> apperr::Result<()> { + let mut pivot_buffer = [const { MaybeUninit::::uninit() }; 4096]; + #[allow(invalid_value)] + let mut buf: [u8; 4096] = unsafe { MaybeUninit::uninit().assume_init() }; + let mut c = icu::Converter::new(&mut pivot_buffer, "UTF-8", self.encoding)?; + let mut offset = 0; + + // Write the BOM for the encodings we know need it. + if self.encoding.starts_with("UTF-16") + || self.encoding.starts_with("UTF-32") + || self.encoding == "gb18030" + { + let (_, output_advance) = c.convert(b"\xEF\xBB\xBF", &mut buf)?; + file.write_all(&buf[..output_advance])?; + } + + loop { + let chunk = self.read_forward(offset); + if chunk.is_empty() { + break; + } + + let (input_advance, output_advance) = c.convert(chunk, &mut buf)?; + file.write_all(&buf[..output_advance])?; + offset += input_advance; + } + + Ok(()) + } + + pub fn selection_update_visual(&mut self, visual_pos: Point) { + let cursor = self.cursor; + self.set_cursor_for_selection(self.cursor_move_to_visual_internal(cursor, visual_pos)); + + match &mut self.selection { + TextBufferSelection::None | TextBufferSelection::Done { .. } => { + self.selection = TextBufferSelection::Active { + beg: cursor.logical_pos, + end: self.cursor.logical_pos, + }; + } + TextBufferSelection::Active { beg: _, end } => { + *end = self.cursor.logical_pos; + } + } + } + + pub fn selection_update_logical(&mut self, logical_pos: Point) { + let cursor = self.cursor; + self.set_cursor_for_selection(self.cursor_move_to_logical_internal(cursor, logical_pos)); + + match &mut self.selection { + TextBufferSelection::None | TextBufferSelection::Done { .. } => { + self.selection = TextBufferSelection::Active { + beg: cursor.logical_pos, + end: self.cursor.logical_pos, + }; + } + TextBufferSelection::Active { beg: _, end } => { + *end = self.cursor.logical_pos; + } + } + } + + pub fn selection_update_delta(&mut self, granularity: CursorMovement, delta: CoordType) { + let cursor = self.cursor; + self.set_cursor_for_selection(self.cursor_move_delta_internal(cursor, granularity, delta)); + + match &mut self.selection { + TextBufferSelection::None | TextBufferSelection::Done { .. } => { + self.selection = TextBufferSelection::Active { + beg: cursor.logical_pos, + end: self.cursor.logical_pos, + }; + } + TextBufferSelection::Active { beg: _, end } => { + *end = self.cursor.logical_pos; + } + } + } + + pub fn select_word(&mut self) { + // TODO: Something is wrong about this. Try the string " in the" + // and double click on the "i" (= the cursor is in front of the "i"). + // It'll select "sup> in" but should only select 1 word of course. + // Not sure what the issue is, but I think this approach is wrong in general. + let beg = self.cursor_move_delta_internal(self.cursor, CursorMovement::Word, -1); + let end = self.cursor_move_delta_internal(beg, CursorMovement::Word, 1); + self.set_cursor_for_selection(end); + self.selection = TextBufferSelection::Done { + beg: beg.logical_pos, + end: end.logical_pos, + }; + } + + pub fn selection_finalize(&mut self) { + if let TextBufferSelection::Active { beg, end } = self.selection { + self.selection = TextBufferSelection::Done { beg, end }; + } + } + + pub fn select_all(&mut self) { + let beg = ucd::UcdCursor::default(); + let end = self.cursor_move_to_logical_internal(beg, Point::MAX); + self.set_cursor_for_selection(end); + self.selection = TextBufferSelection::Done { + beg: beg.logical_pos, + end: end.logical_pos, + }; + } + + pub fn clear_selection(&mut self) -> bool { + let had_selection = self.selection.is_some(); + self.selection = TextBufferSelection::None; + had_selection + } + + pub fn find_and_select(&mut self, pattern: &str, options: SearchOptions) -> apperr::Result<()> { + if let Some(search) = &mut self.search { + if search.pattern != pattern || search.options != options { + self.search = None; + } + } + + if self.search.is_none() { + let mut pattern = Cow::Borrowed(pattern); + let mut flags = icu::Regex::MULTILINE; + + if !options.match_case { + flags |= icu::Regex::CASE_INSENSITIVE; + } + if options.whole_word { + pattern = Cow::Owned(format!(r"\b(?:{})\b", pattern)); + } + if !options.use_regex { + flags |= icu::Regex::LITERAL; + } + + let text = unsafe { icu::Text::new(self)? }; + let regex = unsafe { icu::Regex::new(&pattern, flags, &text)? }; + let mut search = ActiveSearch { + pattern: pattern.to_string(), + options, + text, + regex, + at_start: self.cursor.offset == 0, + no_matches: false, + }; + if self.cursor.offset != 0 { + search.regex.reset(self.cursor.offset); + } + self.search = Some(search); + } + + let search = self.search.as_mut().unwrap(); + if search.no_matches { + return Ok(()); + } + + // If we hit the end of the buffer, and we know that there's something to find, + // start the search again from the beginning (= wrap around). + let mut hit = search.regex.next(); + if hit.is_none() && !search.at_start { + search.at_start = true; + search.regex.reset(0); + hit = search.regex.next(); + } + + if let Some(range) = hit { + search.at_start = false; + + let beg = self.cursor_move_to_offset_internal(self.cursor, range.start); + let end = self.cursor_move_to_offset_internal(beg, range.end); + + self.set_cursor_internal(end); + self.make_cursor_visible(); + + self.selection = TextBufferSelection::Done { + beg: beg.logical_pos, + end: end.logical_pos, + }; + } else { + // Avoid searching through the entire document again if we know there's nothing to find. + search.no_matches = true; + } + + Ok(()) + } + + fn measurement_config(&self) -> ucd::MeasurementConfig { + ucd::MeasurementConfig::new(&self.buffer) + .with_word_wrap_column(self.word_wrap_column) + .with_tab_size(self.tab_size) + } + + fn goto_line_start(&self, mut cursor: ucd::UcdCursor, y: CoordType) -> ucd::UcdCursor { + let cursor_before = cursor; + let mut seek_to_line_start = true; + + if y > cursor.logical_pos.y { + while y > cursor.logical_pos.y { + let chunk = self.read_forward(cursor.offset); + if chunk.is_empty() { + break; + } + + let (delta, line) = ucd::newlines_forward(chunk, 0, cursor.logical_pos.y, y); + cursor.offset += delta; + cursor.logical_pos.y = line; + } + + // If we're at the end of the buffer, we could either be there because the last + // character in the buffer is genuinely a newline, or because the buffer ends in a + // line of text without trailing newline. The only way to make sure is to seek + // backwards to the line start again. But otherwise we can skip that. + seek_to_line_start = + cursor.offset == self.text_length() && cursor.offset != cursor_before.offset; + } + + if seek_to_line_start { + loop { + let chunk = self.read_backward(cursor.offset); + if chunk.is_empty() { + break; + } + + let (delta, line) = + ucd::newlines_backward(chunk, chunk.len(), cursor.logical_pos.y, y); + cursor.offset -= chunk.len() - delta; + cursor.logical_pos.y = line; + if delta > 0 { + break; + } + } + } + + if cursor.offset == cursor_before.offset { + return cursor; + } + + cursor.logical_pos.x = 0; + cursor.visual_pos.x = 0; + cursor.visual_pos.y = cursor.logical_pos.y; + cursor.column = 0; + + if self.word_wrap_column != CoordType::MAX { + let mut cursor_top = cursor_before; + let mut cursor_bottom = cursor; + let upward = cursor_top.offset > cursor_bottom.offset; + + if upward { + mem::swap(&mut cursor_top, &mut cursor_bottom); + } + + let cursor_end = self + .measurement_config() + .with_cursor(cursor_top) + .goto_logical(cursor_bottom.logical_pos); + + let mut delta = cursor_end.visual_pos.y - cursor_top.visual_pos.y; + if upward { + delta = -delta; + } + + cursor.visual_pos.y = cursor_before.visual_pos.y + delta; + } + + cursor + } + + fn cursor_move_to_offset_internal( + &self, + mut cursor: ucd::UcdCursor, + offset: usize, + ) -> ucd::UcdCursor { + while offset < cursor.offset { + cursor = self.cursor_move_to_logical_internal( + cursor, + Point { + x: 0, + y: cursor.logical_pos.y - 1, + }, + ); + } + + self.measurement_config() + .with_cursor(cursor) + .goto_offset(offset) + } + + fn cursor_move_to_logical_internal( + &self, + mut cursor: ucd::UcdCursor, + pos: Point, + ) -> ucd::UcdCursor { + let x = pos.x.max(0); + let y = pos.y.max(0); + + // goto_line_start() is very fast for seeking across lines. But we don't need that + // of course if the `y` didn't actually change. The only exception is if we're + // moving leftward in the same line as there's no read_backward() for that. + if y != cursor.logical_pos.y || x < cursor.logical_pos.x { + cursor = self.goto_line_start(cursor, y); + } + + self.measurement_config() + .with_cursor(cursor) + .goto_logical(Point { x, y }) + } + + fn cursor_move_to_visual_internal( + &self, + mut cursor: ucd::UcdCursor, + pos: Point, + ) -> ucd::UcdCursor { + let x = pos.x.max(0); + let y = pos.y.max(0); + + if self.word_wrap_column == CoordType::MAX { + // goto_line_start() is very fast for seeking across lines. But we don't need that + // of course if the `y` didn't actually change. The only exception is if we're + // moving leftward in the same line as there's no read_backward() for that. + if y < cursor.visual_pos.y || (y == cursor.visual_pos.y && x < cursor.visual_pos.x) { + cursor = self.goto_line_start(cursor, y); + } + } else { + // Since we don't store how many visual lines each logical line spans, + // we have to iterate line by line and check each time if we're there yet. + // We can skip that for forward seeking, since measure_forward() will handle that. + + if y < cursor.visual_pos.y || (y == cursor.visual_pos.y && x < cursor.visual_pos.x) { + cursor = self.goto_line_start(cursor, cursor.logical_pos.y); + } + + while y < cursor.visual_pos.y { + cursor = self.cursor_move_to_logical_internal( + cursor, + Point { + x: 0, + y: cursor.logical_pos.y - 1, + }, + ); + } + } + + self.measurement_config() + .with_cursor(cursor) + .goto_visual(Point { x, y }) + } + + fn cursor_move_delta_internal( + &self, + mut cursor: ucd::UcdCursor, + granularity: CursorMovement, + mut delta: CoordType, + ) -> ucd::UcdCursor { + if delta == 0 { + return cursor; + } + + let sign = if delta > 0 { 1 } else { -1 }; + + match granularity { + CursorMovement::Grapheme => { + let start_x = if delta > 0 { 0 } else { CoordType::MAX }; + + loop { + let target_x = cursor.logical_pos.x + delta; + + cursor = self.cursor_move_to_logical_internal( + cursor, + Point { + x: target_x, + y: cursor.logical_pos.y, + }, + ); + + // We can stop if we ran out of remaining delta + // (or perhaps ran past the goal; in either case the sign would've changed), + // or if we hit the beginning or end of the buffer. + delta = target_x - cursor.logical_pos.x; + if delta.signum() != sign + || (delta < 0 && cursor.offset == 0) + || (delta > 0 && cursor.offset >= self.text_length()) + { + break; + } + + cursor = self.cursor_move_to_logical_internal( + cursor, + Point { + x: start_x, + y: cursor.logical_pos.y + sign, + }, + ); + + // We crossed a newline which counts for 1 grapheme cluster. + // So, we also need to run the same check again. + delta -= sign; + if delta.signum() != sign + || cursor.offset == 0 + || cursor.offset >= self.text_length() + { + break; + } + } + } + CursorMovement::Word => { + let doc = &self.buffer as &dyn Document; + let mut offset = self.cursor.offset; + + while delta != 0 { + if delta < 0 { + offset = ucd::word_backward(doc, offset); + } else { + offset = ucd::word_forward(doc, offset); + } + delta -= sign; + } + + cursor = self.cursor_move_to_offset_internal(cursor, offset); + } + } + + cursor + } + + fn cursor_move_to_offset(&mut self, offset: usize) { + self.set_cursor(self.cursor_move_to_offset_internal(self.cursor, offset)) + } + + pub fn cursor_move_to_logical(&mut self, pos: Point) { + self.set_cursor(self.cursor_move_to_logical_internal(self.cursor, pos)) + } + + pub fn cursor_move_to_visual(&mut self, pos: Point) { + self.set_cursor(self.cursor_move_to_visual_internal(self.cursor, pos)) + } + + pub fn cursor_move_delta(&mut self, granularity: CursorMovement, delta: CoordType) { + self.set_cursor(self.cursor_move_delta_internal(self.cursor, granularity, delta)) + } + + fn set_cursor(&mut self, cursor: ucd::UcdCursor) { + self.set_cursor_internal(cursor); + self.last_history_type = HistoryType::CursorMovement; + self.selection = TextBufferSelection::None; + } + + fn set_cursor_for_selection(&mut self, cursor: ucd::UcdCursor) { + self.set_cursor_internal(cursor); + self.last_history_type = HistoryType::CursorMovement; + } + + fn set_cursor_internal(&mut self, cursor: ucd::UcdCursor) { + debug_assert!( + cursor.offset <= self.text_length() + && cursor.logical_pos.x >= 0 + && cursor.logical_pos.y >= 0 + && cursor.logical_pos.y <= self.stats.logical_lines + && cursor.visual_pos.x >= 0 + && cursor.visual_pos.x <= self.word_wrap_column + && cursor.visual_pos.y >= 0 + && cursor.visual_pos.y <= self.stats.visual_lines + ); + self.cursor = cursor; + } + + fn extract_raw(&self, mut beg: usize, mut end: usize, out: &mut Vec, mut out_off: usize) { + debug_assert!(beg <= end && end <= self.text_length()); + end = end.min(self.text_length()); + beg = beg.min(end); + if beg >= end { + return; + } + + out.reserve(end - beg); + + while beg < end { + let chunk = self.read_forward(beg); + let chunk = &chunk[..chunk.len().min(end - beg)]; + helpers::vec_insert_at(out, out_off, chunk); + beg += chunk.len(); + out_off += chunk.len(); + } + } + + /// Extracts a rectangular region of the text buffer and writes it to the framebuffer. + /// The `destination` rect is framebuffer coordinates. The extracted region within this + /// text buffer has the given `origin` and the same size as the `destination` rect. + pub fn render( + &mut self, + origin: Point, + destination: Rect, + show_cursor: bool, + fb: &mut Framebuffer, + ) { + if destination.is_empty() { + return; + } + + let width = destination.width(); + let height = destination.height(); + let line_number_width = self.margin_width.max(3) as usize - 3; + let text_width = width - self.margin_width; + let mut visualizer_buf = [0xE2, 0x90, 0x80]; // U+2400 in UTF8 + let mut line = String::new(); + let mut cursor = self.cursor_for_rendering.unwrap_or(self.cursor); + + let [selection_beg, selection_end] = match self.selection { + TextBufferSelection::None => [Point::MIN, Point::MIN], + TextBufferSelection::Active { beg, end } | TextBufferSelection::Done { beg, end } => { + helpers::minmax(beg, end) + } + }; + + line.reserve(width as usize * 2); + + for y in 0..height { + line.clear(); + + let visual_line = origin.y + y; + let mut cursor_beg = self.cursor_move_to_visual_internal( + cursor, + Point { + x: origin.x, + y: visual_line, + }, + ); + let cursor_end = self.cursor_move_to_visual_internal( + cursor_beg, + Point { + x: origin.x + text_width, + y: visual_line, + }, + ); + + // Accelerate the next render pass by remembering where we started off. + if y == 0 { + self.cursor_for_rendering = Some(cursor_beg); + } + + if line_number_width != 0 { + if (self.word_wrap_column == CoordType::MAX || cursor_beg.logical_pos.x == 0) + && visual_line < self.stats.visual_lines + { + _ = write!( + line, + "{:1$} │ ", + cursor_beg.logical_pos.y + 1, + line_number_width + ); + } else { + // Place " | " at the beginning of the line. + // Since we know that we won't see line numbers greater than i64::MAX (9223372036854775807) + // any time soon, we can use a static string as the template (`MARGIN`) and slice it, + // because `line_number_width` can't possibly be larger than 19. + let off = 19 - line_number_width; + unsafe { std::hint::assert_unchecked(off < MARGIN_TEMPLATE.len()) }; + line.push_str(&MARGIN_TEMPLATE[off..]); + } + } + + // Nothing to do if the entire line is empty. + if cursor_beg.offset != cursor_end.offset { + // If we couldn't reach the left edge, we may have stopped short due to a wide glyph. + // In that case we'll try to find the next character and then compute by how many + // columns it overlaps the left edge (can be anything between 1 and 7). + if cursor_beg.visual_pos.x < origin.x { + let cursor_next = self.cursor_move_to_logical_internal( + cursor_beg, + Point { + x: cursor_beg.logical_pos.x + 1, + y: cursor_beg.logical_pos.y, + }, + ); + + if cursor_next.visual_pos.x > origin.x { + let overlap = cursor_next.visual_pos.x - origin.x; + debug_assert!((1..=7).contains(&overlap)); + line.push_str(&TAB_WHITESPACE[..overlap as usize]); + cursor_beg = cursor_next; + } + } + + fn find_control_char(text: &[u8], mut offset: usize) -> usize { + while offset < text.len() && (text[offset] >= 0x20 && text[offset] != 0x7f) { + offset += 1; + } + offset + } + + let mut global_off = cursor_beg.offset; + let mut cursor_tab = cursor_beg; + + while global_off < cursor_end.offset { + let chunk = self.read_forward(global_off); + let chunk = &chunk[..chunk.len().min(cursor_end.offset - global_off)]; + + let mut chunk_off = 0; + while chunk_off < chunk.len() { + let beg = chunk_off; + chunk_off = find_control_char(chunk, beg); + + for chunk in chunk[beg..chunk_off].utf8_chunks() { + if !chunk.valid().is_empty() { + line.push_str(chunk.valid()); + } + if !chunk.invalid().is_empty() { + line.push('\u{FFFD}'); + } + } + + while chunk_off < chunk.len() + && (chunk[chunk_off] < 0x20 || chunk[chunk_off] == 0x7f) + { + let ch = chunk[chunk_off]; + chunk_off += 1; + + if ch == b'\t' { + cursor_tab = self.cursor_move_to_offset_internal( + cursor_tab, + global_off + chunk_off - 1, + ); + let tab_size = self.tab_size - (cursor_tab.column % self.tab_size); + line.push_str(&TAB_WHITESPACE[..tab_size as usize]); + + // Since we know that we just aligned ourselves to the next tab stop, + // we can trivially process any successive tabs. + while chunk_off < chunk.len() && chunk[chunk_off] == b'\t' { + line.push_str(&TAB_WHITESPACE[..self.tab_size as usize]); + chunk_off += 1; + } + continue; + } + + visualizer_buf[2] = if ch == 0x7F { + 0xA1 // U+2421 + } else { + 0x80 | ch // 0x00..=0x1F => U+2400..=U+241F + }; + // Our manually constructed UTF8 is never going to be invalid. Trust. + line.push_str(unsafe { str::from_utf8_unchecked(&visualizer_buf) }); + } + } + + global_off += chunk.len(); + } + } + + fb.replace_text( + destination.top + y, + destination.left, + destination.right, + &line, + ); + + // Draw the selection on this line, if any. + if cursor_beg.logical_pos < selection_end && cursor_end.logical_pos > selection_beg { + // By default, we assume the entire line is selected. + let mut beg = 0; + let mut end = COORD_TYPE_SAFE_MAX; + let mut cursor = cursor_beg; + + // The start of the selection is within this line. We need to update selection_beg. + if selection_beg > cursor_beg.logical_pos && selection_beg <= cursor_end.logical_pos + { + cursor = self.cursor_move_to_logical_internal(cursor, selection_beg); + beg = cursor.visual_pos.x; + debug_assert_eq!(cursor.visual_pos.y, cursor_beg.visual_pos.y); + } + + // The end of the selection is within this line. We need to update selection_end. + if selection_end > cursor_beg.logical_pos && selection_end <= cursor_end.logical_pos + { + cursor = self.cursor_move_to_logical_internal(cursor, selection_end); + end = cursor.visual_pos.x; + debug_assert_eq!(cursor.visual_pos.y, cursor_beg.visual_pos.y); + } + + beg = beg.max(origin.x); + end = end.min(origin.x + text_width); + + let left = destination.left + self.margin_width - origin.x; + let top = destination.top + y; + let rect = Rect { + left: left + beg, + top, + right: left + end, + bottom: top + 1, + }; + + fb.blend_bg(rect, fb.indexed(IndexedColor::DefaultForeground)); + fb.blend_fg(rect, fb.indexed(IndexedColor::DefaultBackground)); + } + + cursor = cursor_end; + } + + // Colorize the margin that we wrote above. + if self.margin_width > 0 { + let margin = Rect { + left: destination.left, + top: destination.top, + right: destination.left + self.margin_width, + bottom: destination.bottom, + }; + fb.blend_fg(margin, 0x7f7f7f7f); + } + + if self.ruler > 0 && self.ruler < CoordType::MAX { + let left = destination.left + self.margin_width + (self.ruler - origin.x).max(0); + let right = destination.right; + if left < right { + fb.blend_bg( + Rect { + left, + top: destination.top, + right, + bottom: destination.bottom, + }, + fb.indexed(IndexedColor::BrightRed) & 0x1fffffff, + ); + } + } + + if show_cursor { + let text = Rect { + left: destination.left + self.margin_width, + top: destination.top, + right: destination.right, + bottom: destination.bottom, + }; + let cursor = Point { + x: self.cursor.visual_pos.x - origin.x + destination.left + self.margin_width, + y: self.cursor.visual_pos.y - origin.y + destination.top, + }; + if text.contains(cursor) { + fb.set_cursor(cursor, self.overtype); + } + } + } + + /// Inserts `text` at the current cursor position. + /// + /// If there's a current selection, it will be replaced. + /// The selection is cleared after the call. + pub fn write(&mut self, text: &[u8]) { + if text.is_empty() { + return; + } + + if self.selection.is_some() { + if let Some((beg, end)) = self.extract_selection_range() { + self.edit_begin(HistoryType::Write, beg); + self.edit_delete(end); + self.selection = TextBufferSelection::None; + } + } + if self.active_edit_depth <= 0 { + self.edit_begin(HistoryType::Write, self.cursor); + } + + let mut offset = 0; + let mut newline_buffer = String::new(); + + loop { + let (offset_next, _) = ucd::newlines_forward(text, offset, 0, 1); + + let mut line = &text[offset..offset_next]; + // Trim trailing LF or CRLF, if any. + if line.ends_with(b"\n") { + line = &line[..line.len() - 1] + } + if line.ends_with(b"\r") { + line = &line[..line.len() - 1] + } + + let column_before = self.cursor.logical_pos.x; + + // Write the contents of the line into the buffer. + let mut line_off = 0; + while line_off < line.len() { + // Split the line into chunks of non-tabs and tabs. + let mut plain = line; + if !self.indent_with_tabs { + let end = memchr2(b'\t', b'\t', line, line_off); + plain = &line[line_off..end]; + } + + // Non-tabs are written as-is, because the outer loop already handles newline translation. + self.edit_write(plain); + line_off += plain.len(); + + // Now replace tabs with spaces. + while line_off < line.len() && line[line_off] == b'\t' { + let spaces = self.tab_size - (self.cursor.column % self.tab_size); + let spaces = &TAB_WHITESPACE.as_bytes()[..spaces as usize]; + self.edit_write(spaces); + line_off += 1; + } + } + + if self.overtype { + let delete = self.cursor.logical_pos.x - column_before; + let end = self.cursor_move_to_logical_internal( + self.cursor, + Point { + x: self.cursor.logical_pos.x + delete, + y: self.cursor.logical_pos.y, + }, + ); + self.edit_delete(end); + } + + offset += line.len(); + if offset >= text.len() { + break; + } + + // We'll give the next line the same indentation as the previous one. + // This block figures out how much that is. We can't reuse that value, + // because " a\n a\n" should give the 3rd line a total indentation of 4. + // Assuming your terminal has bracketed paste, this won't be a concern though. + // (If it doesn't, use a different terminal.) + let tab_size = self.tab_size as usize; + let mut newline_indentation = 0usize; + { + let line_beg = self.goto_line_start(self.cursor, self.cursor.logical_pos.y); + let limit = self.cursor.offset; + let mut off = line_beg.offset; + + 'outer: while off < limit { + let chunk = self.read_forward(off); + let chunk = &chunk[..chunk.len().min(limit - off)]; + + for &c in chunk { + if c == b' ' { + newline_indentation += 1; + } else if c == b'\t' { + newline_indentation += tab_size - (newline_indentation % tab_size); + } else { + break 'outer; + } + } + + off += chunk.len(); + } + } + + // First, write the newline. + newline_buffer.clear(); + newline_buffer.push_str(if self.newlines_are_crlf { "\r\n" } else { "\n" }); + + // If tabs are enabled, add as many tabs as we can. + if self.indent_with_tabs { + let tab_count = newline_indentation / tab_size; + helpers::string_append_repeat(&mut newline_buffer, '\t', tab_count); + newline_indentation -= tab_count * tab_size; + } + + // If tabs are disabled, or if the indentation wasn't a multiple of the tab size, + // add spaces to make up the difference. + helpers::string_append_repeat(&mut newline_buffer, ' ', newline_indentation); + + self.edit_write(newline_buffer.as_bytes()); + + offset = offset_next; + if offset >= text.len() { + break; + } + } + + self.edit_end(); + } + + /// Deletes 1 grapheme cluster from the buffer. + /// `cursor_movements` is expected to be -1 for backspace and 1 for delete. + /// If there's a current selection, it will be deleted and `cursor_movements` ignored. + /// The selection is cleared after the call. + /// Deletes characters from the buffer based on a delta from the cursor. + pub fn delete(&mut self, granularity: CursorMovement, delta: CoordType) { + debug_assert!(delta == -1 || delta == 1); + + let beg; + let end; + + if self.selection.is_some() { + (beg, end) = match self.extract_selection_range() { + Some(v) => v, + None => return, + }; + self.selection = TextBufferSelection::None; + } else { + if (delta == -1 && self.cursor.offset == 0) + || (delta == 1 && self.cursor.offset >= self.text_length()) + { + // Nothing to delete. + return; + } + + beg = self.cursor; + end = self.cursor_move_delta_internal(beg, granularity, delta); + if beg.offset == end.offset { + return; + } + } + + self.edit_begin(HistoryType::Delete, beg); + self.edit_delete(end); + self.edit_end(); + } + + /// Extracts a chunk of text or a line if no selection is active. May optionally delete it. + pub fn extract_selection(&mut self, delete: bool) -> Vec { + let Some((beg, end)) = self.extract_selection_range() else { + return Vec::new(); + }; + + let mut out = Vec::new(); + self.extract_raw(beg.offset, end.offset, &mut out, 0); + + if delete && !out.is_empty() { + self.edit_begin(HistoryType::Delete, beg); + self.edit_delete(end); + self.edit_end(); + self.selection = TextBufferSelection::None; + } + + out + } + + fn extract_selection_range(&self) -> Option<(ucd::UcdCursor, ucd::UcdCursor)> { + let (beg, end) = match self.selection { + TextBufferSelection::None => { + // If there's no selection, editors commonly copy the current line. + ( + Point { + x: 0, + y: self.cursor.logical_pos.y, + }, + Point { + x: 0, + y: self.cursor.logical_pos.y + 1, + }, + ) + } + TextBufferSelection::Active { beg, end } | TextBufferSelection::Done { beg, end } => { + (beg.min(end), beg.max(end)) + } + }; + + let beg = self.cursor_move_to_logical_internal(self.cursor, beg); + let end = self.cursor_move_to_logical_internal(beg, end); + + if beg.offset < end.offset { + Some((beg, end)) + } else { + None + } + } + + fn edit_begin(&mut self, history_type: HistoryType, cursor: ucd::UcdCursor) { + self.active_edit_depth += 1; + if self.active_edit_depth > 1 { + return; + } + + let cursor_before = self.cursor; + self.set_cursor_internal(cursor); + + // If both the last and this are a Write operation, we skip allocating a new undo history item. + if !(self.last_history_type == HistoryType::Write && history_type == HistoryType::Write) + && !(self.last_history_type == HistoryType::Delete + && history_type == HistoryType::Delete) + { + self.redo_stack.clear(); + while self.undo_stack.len() > 1000 { + self.undo_stack.pop_front(); + } + + self.last_history_type = history_type; + self.active_edit_off = cursor.offset; + self.undo_stack.push_back(HistoryEntry { + cursor_before: cursor_before.logical_pos, + selection_before: self.selection, + stats_before: self.stats, + generation_before: self.buffer.generation, + cursor: cursor.logical_pos, + deleted: Vec::new(), + added: Vec::new(), + }); + } + + // If word-wrap is enabled, the visual layout of all logical lines affected by the write + // may have changed. This includes even text before the insertion point up to the line + // start, because this write may have joined with a word before the initial cursor. + // See other uses of `word_wrap_cursor_next_line` in this function. + if self.word_wrap_column != CoordType::MAX { + let safe_start = self.goto_line_start(cursor_before, cursor_before.logical_pos.y); + let next_line = self.cursor_move_to_logical_internal( + cursor_before, + Point { + x: 0, + y: cursor_before.logical_pos.y + 1, + }, + ); + self.active_edit_line_info = Some(ActiveEditLineInfo { + safe_start, + line_height_in_rows: next_line.visual_pos.y - safe_start.visual_pos.y, + distance_next_line_start: next_line.offset - cursor_before.offset, + }); + } + } + + fn edit_write(&mut self, text: &[u8]) { + let logical_y_before = self.cursor.logical_pos.y; + + // Copy the written portion into the undo entry. + { + let undo = self.get_last_undo_mut(); + undo.added.extend_from_slice(text); + } + + // Write! + { + let gap = self + .buffer + .allocate_gap(self.active_edit_off, text.len(), 0); + gap.copy_from_slice(text); + self.buffer.commit_gap(text.len()); + } + + // Move self.cursor to the end of the newly written text. Can't use `self.set_cursor_internal`, + // because we're still in the progress of recalculating the line stats. + self.active_edit_off += text.len(); + self.cursor = self.cursor_move_to_offset_internal(self.cursor, self.active_edit_off); + self.stats.logical_lines += self.cursor.logical_pos.y - logical_y_before; + } + + fn edit_delete(&mut self, to: ucd::UcdCursor) { + let logical_y_before = self.cursor.logical_pos.y; + let backward = to.offset < self.active_edit_off; + let [beg, end] = helpers::minmax(self.active_edit_off, to.offset); + + // Copy the deleted portion into the undo entry. + { + let undo = self.get_last_undo_mut(); + let deleted = trust_me_bro::this_lifetime_change_is_totally_safe_mut(&mut undo.deleted); + self.extract_raw(beg, end, deleted, if backward { 0 } else { deleted.len() }); + } + + // Delete the portion from the buffer by enlarging the gap. + { + let count = end - beg; + self.buffer.allocate_gap(beg, 0, count); + self.buffer.commit_gap(0); + } + + // Move self.cursor to the beginning of the deleted text. + // This is only relevant for backward deletions. + if backward { + let undo = self.get_last_undo_mut(); + undo.cursor = to.logical_pos; + self.cursor = to; + } + + self.active_edit_off = beg; + self.stats.logical_lines += logical_y_before - to.logical_pos.y; + } + + fn edit_end(&mut self) { + self.active_edit_depth -= 1; + assert!(self.active_edit_depth >= 0); + if self.active_edit_depth > 0 { + return; + } + + #[cfg(debug_assertions)] + { + let entry = self.get_last_undo_mut(); + debug_assert!(!entry.deleted.is_empty() || !entry.added.is_empty()); + } + + if let Some(info) = self.active_edit_line_info.take() { + let entry = self.get_last_undo_mut(); + let deleted_count = entry.deleted.len(); + let target = self.cursor.logical_pos; + + // From our safe position we can measure the actual visual position of the cursor. + self.set_cursor_internal(self.cursor_move_to_logical_internal(info.safe_start, target)); + + // If content is added at the insertion position, that's not a problem: + // We can just remeasure the height of this one line and calculate the delta. + // `deleted_count` is 0 in this case. + // + // The problem is when content is deleted, because it may affect lines + // beyond the end of the `next_line`. In that case we have to measure + // the entire buffer contents until the end to compute `self.stats.visual_lines`. + if deleted_count < info.distance_next_line_start { + // Now we can measure how many more visual rows this logical line spans. + let next_line = self.cursor_move_to_logical_internal( + self.cursor, + Point { + x: 0, + y: target.y + 1, + }, + ); + let lines_before = info.line_height_in_rows; + let lines_after = next_line.visual_pos.y - info.safe_start.visual_pos.y; + self.stats.visual_lines += lines_after - lines_before; + } else { + let end = self.cursor_move_to_logical_internal(self.cursor, Point::MAX); + self.stats.visual_lines = end.visual_pos.y + 1; + } + } else { + // If word-wrap is disabled the visual line count always matches the logical one. + self.stats.visual_lines = self.stats.logical_lines; + } + + self.search = None; + + // Also takes care of clearing `cursor_for_rendering`. + self.reflow(false); + } + + pub fn undo(&mut self) { + let undo_stack = + trust_me_bro::this_lifetime_change_is_totally_safe_mut(&mut self.undo_stack); + let redo_stack = + trust_me_bro::this_lifetime_change_is_totally_safe_mut(&mut self.redo_stack); + self.undo_redo(undo_stack, redo_stack); + } + + pub fn redo(&mut self) { + let redo_stack = + trust_me_bro::this_lifetime_change_is_totally_safe_mut(&mut self.redo_stack); + let undo_stack = + trust_me_bro::this_lifetime_change_is_totally_safe_mut(&mut self.undo_stack); + self.undo_redo(redo_stack, undo_stack); + } + + fn get_last_undo_mut(&mut self) -> &mut HistoryEntry { + self.undo_stack.back_mut().expect("undo_stack is empty") + } + + fn undo_redo( + &mut self, + from: &mut LinkedList, + to: &mut LinkedList, + ) { + let len = from.len(); + if len == 0 { + return; + } + + let mut tail = from.split_off(len - 1); + to.append(&mut tail); + let change = to.back_mut().unwrap(); + + // Undo: Whatever was deleted is now added and vice versa. + mem::swap(&mut change.deleted, &mut change.added); + + // Move to the point where the modification took place. + let cursor = self.cursor_move_to_logical_internal(self.cursor, change.cursor); + + let safe_cursor = if self.word_wrap_column != CoordType::MAX { + // If word-wrap is enabled, we need to move the cursor to the beginning of the line. + // This is because the undo/redo operation may have changed the visual position of the cursor. + self.goto_line_start(cursor, cursor.logical_pos.y) + } else { + cursor + }; + + // Delete the inserted portion and reinsert the deleted portion. + { + let deleted = change.deleted.len(); + let added = &change.added[..]; + let gap = self + .buffer + .allocate_gap(cursor.offset, added.len(), deleted); + gap.copy_from_slice(added); + self.buffer.commit_gap(added.len()); + } + + // Restore the previous line statistics. + mem::swap(&mut self.stats, &mut change.stats_before); + + // Restore the previous selection. + mem::swap(&mut self.selection, &mut change.selection_before); + + // Pretend as if the buffer was never modified. + mem::swap(&mut self.buffer.generation, &mut change.generation_before); + + // Restore the previous cursor. + let cursor_before = self.cursor_move_to_logical_internal(safe_cursor, change.cursor_before); + change.cursor_before = self.cursor.logical_pos; + self.set_cursor_internal(cursor_before); + + if self.undo_stack.is_empty() { + self.last_history_type = HistoryType::None; + } + } + + pub fn read_backward(&self, off: usize) -> &[u8] { + self.buffer.read_backward(off) + } + + pub fn read_forward(&self, off: usize) -> &[u8] { + self.buffer.read_forward(off) + } +} + +enum BackingBuffer { + VirtualMemory(*mut u8, usize), + Vec(Vec), +} + +impl Drop for BackingBuffer { + fn drop(&mut self) { + unsafe { + if let BackingBuffer::VirtualMemory(ptr, reserve) = *self { + sys::virtual_release(ptr, reserve); + } + } + } +} + +/// Most people know how Vec works: It has some spare capacity at the end, +/// so that pushing into it doesn't reallocate every single time. A gap buffer +/// is the same thing, but the spare capacity can be anywhere in the buffer. +/// This variant is optimized for large buffers and uses virtual memory. +pub struct GapBuffer { + /// Pointer to the buffer. + text: *mut u8, + /// Maximum size of the buffer, including gap. + reserve: usize, + /// Size of the buffer, including gap. + commit: usize, + /// Length of the stored text, NOT including gap. + text_length: usize, + /// Gap offset. + gap_off: usize, + /// Gap length. + gap_len: usize, + /// Increments every time the buffer is modified. + generation: u32, + /// If `Vec(..)`, the buffer is optimized for small amounts of text + /// and uses the standard heap. Otherwise, it uses virtual memory. + buffer: BackingBuffer, +} + +impl GapBuffer { + fn new(small: bool) -> apperr::Result { + const RESERVE: usize = 2 * 1024 * 1024 * 1024; + + let buffer; + let text; + + if small { + let mut v = Vec::new(); + text = v.as_mut_ptr(); + buffer = BackingBuffer::Vec(v); + } else { + text = unsafe { sys::virtual_reserve(RESERVE)? }; + buffer = BackingBuffer::VirtualMemory(text, RESERVE); + } + + Ok(Self { + text, + reserve: RESERVE, + commit: 0, + text_length: 0, + gap_off: 0, + gap_len: 0, + generation: 0, + buffer, + }) + } + + fn len(&self) -> usize { + self.text_length + } + + fn allocate_gap(&mut self, off: usize, len: usize, delete: usize) -> &mut [u8] { + const LARGE_ALLOC_CHUNK: usize = 64 * 1024; + const LARGE_GAP_CHUNK: usize = 4 * 1024; + const SMALL_ALLOC_CHUNK: usize = 256; + const SMALL_GAP_CHUNK: usize = 16; + + // Sanitize parameters + let off = off.min(self.text_length); + let delete = delete.min(self.text_length - off); + + // Move the existing gap if it exists + if off != self.gap_off { + let gap_off = self.gap_off; + let gap_len = self.gap_len; + + if gap_len > 0 { + // + // v gap_off + // left: |ABCDEFGHIJKLMN OPQRSTUVWXYZ| + // |ABCDEFGHI JKLMNOPQRSTUVWXYZ| + // ^ off + // move: GLMNET + // + // v gap_off + // !left: |ABCDEFGHIJKLMN OPQRSTUVWXYZ| + // |ABCDEFGHIJKLMNOPQRS TUVWXYZ| + // ^ off + // move: OPPOSERS + // + let data = self.text; + let left = off < gap_off; + let move_src = if left { off } else { gap_off + gap_len }; + let move_dst = if left { off + gap_len } else { gap_off }; + let move_len = if left { gap_off - off } else { off - gap_off }; + + unsafe { ptr::copy(data.add(move_src), data.add(move_dst), move_len) }; + + if cfg!(debug_assertions) { + unsafe { slice::from_raw_parts_mut(data.add(off), gap_len).fill(0xCD) }; + } + } + + self.gap_off = off; + } + + // Delete the text + if cfg!(debug_assertions) { + unsafe { + slice::from_raw_parts_mut(self.text.add(off + self.gap_len), delete).fill(0xCD) + }; + } + self.gap_len += delete; + self.text_length -= delete; + + // Enlarge the gap if needed + if len > self.gap_len { + let gap_chunk; + let alloc_chunk; + + if matches!(self.buffer, BackingBuffer::VirtualMemory(..)) { + gap_chunk = LARGE_GAP_CHUNK; + alloc_chunk = LARGE_ALLOC_CHUNK; + } else { + gap_chunk = SMALL_GAP_CHUNK; + alloc_chunk = SMALL_ALLOC_CHUNK; + } + + let gap_len_old = self.gap_len; + let gap_len_new = (len + gap_chunk + gap_chunk - 1) & !(gap_chunk - 1); + let bytes_old = self.commit; + let bytes_new = self.text_length + gap_len_new; + + if bytes_new > bytes_old { + let bytes_new = (bytes_new + alloc_chunk - 1) & !(alloc_chunk - 1); + assert!(bytes_new <= self.reserve); + + match &mut self.buffer { + BackingBuffer::VirtualMemory(ptr, _) => unsafe { + sys::virtual_commit(ptr.add(bytes_old), bytes_new - bytes_old).expect("OOM") + }, + BackingBuffer::Vec(v) => { + v.resize(bytes_new, 0); + self.text = v.as_mut_ptr(); + } + } + + self.commit = bytes_new; + } + + let gap_beg = unsafe { self.text.add(off) }; + unsafe { + ptr::copy( + gap_beg.add(gap_len_old), + gap_beg.add(gap_len_new), + self.text_length - off, + ) + }; + + if cfg!(debug_assertions) { + unsafe { + slice::from_raw_parts_mut(gap_beg.add(gap_len_old), gap_len_new - gap_len_old) + .fill(0xCD) + }; + } + + self.gap_len = gap_len_new; + } + + self.generation = self.generation.wrapping_add(1); + unsafe { slice::from_raw_parts_mut(self.text.add(off), len) } + } + + fn commit_gap(&mut self, len: usize) { + assert!(len <= self.gap_len); + self.text_length += len; + self.gap_off += len; + self.gap_len -= len; + } + + fn clear(&mut self) { + self.gap_off = 0; + self.gap_len += self.text_length; + self.generation = self.generation.wrapping_add(1); + self.text_length = 0; + } + + /// Replaces the entire buffer contents with the given `text`. + /// The method is optimized for the case where the given `text` already matches + /// the existing contents. Returns `true` if the buffer contents were changed. + fn copy_from_str(&mut self, text: &str) -> bool { + let input = text.as_bytes(); + let max_common = self.text_length.min(input.len()); + let mut common = 0; + + // Find the position at which the contents change. + while common < max_common { + let chunk = self.read_forward(common); + let cmp_len = chunk.len().min(max_common - common); + + if chunk[..cmp_len] != input[common..common + cmp_len] { + // Find the first differing byte. + common += chunk[..cmp_len] + .iter() + .zip(&input[common..common + cmp_len]) + .position(|(&a, &b)| a != b) + .unwrap_or(cmp_len); + break; + } + + common += cmp_len; + } + + // If the contents are identical, we're done. + if common == self.text_length && common == input.len() { + return false; + } + + // Update the buffer from the first differing byte. + let new = &input[common..]; + let gap = self.allocate_gap(common, new.len(), self.text_length - common); + gap.copy_from_slice(new); + self.commit_gap(new.len()); + true + } + + /// Copies the contents of the buffer into a string. + fn copy_into_string(&self, dst: &mut String) { + dst.clear(); + + let mut off = 0; + while off < self.text_length { + let chunk = self.read_forward(off); + dst.push_str(&String::from_utf8_lossy(chunk)); + off += chunk.len(); + } + } +} + +impl Document for GapBuffer { + fn read_backward(&self, off: usize) -> &[u8] { + let off = off.min(self.text_length); + let beg; + let len; + + if off <= self.gap_off { + // Cursor is before the gap: We can read until the beginning of the buffer. + beg = 0; + len = off; + } else { + // Cursor is after the gap: We can read until the end of the gap. + beg = self.gap_off + self.gap_len; + // The cursor_off doesn't account of the gap_len. + // (This allows us to move the gap without recalculating the cursor position.) + len = off - self.gap_off; + } + + unsafe { slice::from_raw_parts(self.text.add(beg), len) } + } + + fn read_forward(&self, off: usize) -> &[u8] { + let off = off.min(self.text_length); + let beg; + let len; + + if off < self.gap_off { + // Cursor is before the gap: We can read until the start of the gap. + beg = off; + len = self.gap_off - off; + } else { + // Cursor is after the gap: We can read until the end of the buffer. + beg = off + self.gap_len; + len = self.text_length - off; + } + + unsafe { slice::from_raw_parts(self.text.add(beg), len) } + } +} + +pub enum Bom { + None, + UTF8, + UTF16LE, + UTF16BE, + UTF32LE, + UTF32BE, + GB18030, +} + +const BOM_MAX_LEN: usize = 4; + +fn detect_bom(bytes: &[u8]) -> Option<&'static str> { + if bytes.len() >= 4 { + if bytes.starts_with(b"\xFF\xFE\x00\x00") { + return Some("UTF-32LE"); + } + if bytes.starts_with(b"\x00\x00\xFE\xFF") { + return Some("UTF-32BE"); + } + if bytes.starts_with(b"\x84\x31\x95\x33") { + return Some("gb18030"); + } + } + if bytes.len() >= 3 && bytes.starts_with(b"\xEF\xBB\xBF") { + return Some("UTF-8"); + } + if bytes.len() >= 2 { + if bytes.starts_with(b"\xFF\xFE") { + return Some("UTF-16LE"); + } + if bytes.starts_with(b"\xFE\xFF") { + return Some("UTF-16BE"); + } + } + None +} diff --git a/src/edit.exe.manifest b/src/edit.exe.manifest new file mode 100644 index 0000000..f6582a5 --- /dev/null +++ b/src/edit.exe.manifest @@ -0,0 +1,22 @@ + + + + + true + UTF-8 + SegmentHeap + + + + + + + + diff --git a/src/framebuffer.rs b/src/framebuffer.rs new file mode 100644 index 0000000..c0d131b --- /dev/null +++ b/src/framebuffer.rs @@ -0,0 +1,536 @@ +use crate::helpers::{CoordType, Point, Rect, Size}; +use crate::{helpers, ucd}; +use std::fmt::Write; + +pub enum IndexedColor { + Black, + Red, + Green, + Yellow, + Blue, + Magenta, + Cyan, + White, + BrightBlack, + BrightRed, + BrightGreen, + BrightYellow, + BrightBlue, + BrightMagenta, + BrightCyan, + BrightWhite, + DefaultBackground, + DefaultForeground, +} + +pub const INDEXED_COLORS_COUNT: usize = 18; + +pub const DEFAULT_THEME: [u32; INDEXED_COLORS_COUNT] = [ + 0xff000000, 0xff212cbe, 0xff3aae3f, 0xff4a9abe, 0xffbe4d20, 0xffbe54bb, 0xffb2a700, 0xffbebebe, + 0xff808080, 0xff303eff, 0xff51ea58, 0xff44c9ff, 0xffff6a2f, 0xffff74fc, 0xfff0e100, 0xffffffff, + 0xff000000, 0xffffffff, +]; + +pub struct Framebuffer { + indexed_colors: [u32; INDEXED_COLORS_COUNT], + size: Size, + lines: Vec, + bg_bitmap: Vec, + fg_bitmap: Vec, + auto_colors: [u32; 2], // [dark, light] + cursor: Point, + cursor_overtype: bool, +} + +impl Framebuffer { + pub fn new() -> Self { + Self { + indexed_colors: DEFAULT_THEME, + size: Size::default(), + lines: Vec::new(), + bg_bitmap: Vec::new(), + fg_bitmap: Vec::new(), + auto_colors: [0, 0], + cursor: Point { x: -1, y: -1 }, + cursor_overtype: false, + } + } + + pub fn set_indexed_colors(&mut self, colors: [u32; INDEXED_COLORS_COUNT]) { + self.indexed_colors = colors; + + self.auto_colors = [ + self.indexed_colors[IndexedColor::Black as usize], + self.indexed_colors[IndexedColor::BrightWhite as usize], + ]; + if !Self::quick_is_dark(self.auto_colors[0]) { + self.auto_colors.swap(0, 1); + } + } + + pub fn reset(&mut self, size: Size) { + let width = size.width as usize; + + if size != self.size { + let height = size.height as usize; + let area = width * height; + self.size = size; + self.lines = vec![String::new(); height]; + self.bg_bitmap = vec![0; area]; + self.fg_bitmap = vec![0; area]; + } + + let bg = self.indexed_colors[IndexedColor::DefaultBackground as usize]; + self.bg_bitmap.fill(bg); + self.fg_bitmap.fill(0); + self.cursor = Point { x: -1, y: -1 }; + + for l in &mut self.lines { + l.clear(); + l.reserve(width + width / 2); + helpers::string_append_repeat(l, ' ', width); + } + } + + /// Replaces text contents in a single line of the framebuffer. + /// All coordinates are in viewport coordinates. + /// Assumes that all tabs have been replaced with spaces. + /// + /// # Arguments + /// + /// * `y` - The y-coordinate of the line to replace. + /// * `origin_x` - The x-coordinate where the text should be inserted. + /// * `clip_right` - The x-coordinate past which the text will be clipped. + /// * `text` - The text to insert. + /// + /// # Returns + /// + /// The rectangle that was updated. + pub fn replace_text( + &mut self, + y: CoordType, + origin_x: CoordType, + clip_right: CoordType, + text: &str, + ) -> Rect { + let Some(line) = self.lines.get_mut(y as usize) else { + return Rect::default(); + }; + + let bytes = text.as_bytes(); + let clip_right = clip_right.clamp(0, self.size.width); + let layout_width = clip_right - origin_x; + + // Can't insert text that can't fit or is empty. + if layout_width <= 0 || bytes.is_empty() { + return Rect::default(); + } + + let mut cfg = ucd::MeasurementConfig::new(&bytes); + + // Check if the text intersects with the left edge of the framebuffer + // and figure out the parts that are inside. + let mut left = origin_x; + if left < 0 { + let cursor = cfg.goto_visual(Point { x: -left, y: 0 }); + left += cursor.visual_pos.x; + + if left < 0 && cursor.offset < text.len() { + // `-left` must've intersected a wide glyph. Go to the next one. + let cursor = cfg.goto_logical(Point { + x: cursor.logical_pos.x + 1, + y: 0, + }); + left += cursor.visual_pos.x; + } + } + + // If the text still starts outside the framebuffer, we must've ran out of text above. + // Otherwise, if it starts outside the right edge to begin with, we can't insert it anyway. + if left < 0 || left >= clip_right { + return Rect::default(); + } + + // Measure the width of the new text (= `res_new.visual_target.x`). + let res_new = cfg.goto_visual(Point { + x: layout_width, + y: 0, + }); + + // Figure out at which byte offset the new text gets inserted. + let right = left + res_new.visual_pos.x; + let line_bytes = line.as_bytes(); + let mut cfg_old = ucd::MeasurementConfig::new(&line_bytes); + let res_old_beg = cfg_old.goto_visual(Point { x: left, y: 0 }); + let res_old_end = cfg_old.goto_visual(Point { x: right, y: 0 }); + + // If we intersect a wide glyph, we need to pad the new text with spaces. + let mut str_new = &text[..res_new.offset]; + let mut str_buf = String::new(); + let overlap_beg = res_old_beg.visual_pos.x - left; + let overlap_end = right - res_old_end.visual_pos.x; + if overlap_beg > 0 || overlap_end > 0 { + if overlap_beg > 0 { + helpers::string_append_repeat(&mut str_buf, ' ', overlap_beg as usize); + } + str_buf.push_str(str_new); + if overlap_end > 0 { + helpers::string_append_repeat(&mut str_buf, ' ', overlap_end as usize); + } + str_new = &str_buf; + } + + (*line).replace_range(res_old_beg.offset..res_old_end.offset, str_new); + + Rect { + left, + top: y, + right, + bottom: y + 1, + } + } + + pub fn draw_scrollbar( + &mut self, + clip_rect: Rect, + track: Rect, + content_offset: CoordType, + content_height: CoordType, + ) { + if track.is_empty() { + return; + } + + let viewport_height = track.height(); + // The content height is at least the viewport height. + let content_height = content_height.max(viewport_height); + // The content offset must be at least one viewport height from the bottom. + // You don't want to scroll past the end after all... + let content_offset = content_offset.clamp(0, content_height - viewport_height); + + // In order to increase the visual resolution of the scrollbar, + // we'll use 1/8th blocks to represent the thumb. + // First, scale the offsets to get that 1/8th resolution. + let viewport_height = viewport_height as i64 * 8; + let content_offset = content_offset as i64 * 8; + let content_height = content_height as i64 * 8; + + // The proportional thumb height (0-1) is the fraction of viewport and + // content height. The taller the content, the smaller the thumb: + // = viewport_height / content_height + // + // We then scale that to the viewport height to get the height in 1/8th units. + // = viewport_height * viewport_height / content_height + // + // We add content_height/2 to round the integer division, which results in a numerator of: + // = viewport_height * viewport_height + content_height / 2 + // + // Finally we add +1 to round up the division if `content_height` is uneven. This ensures that + // in case of a rounding issue, we'll make the track too large and clamp it to the track size. + let thumb_numerator = viewport_height * viewport_height + content_height / 2 + 1; + let thumb_height = thumb_numerator / content_height; + // Ensure the thumb has a minimum size of 1 row. + let thumb_height = thumb_height.max(8); + + // The proportional thumb top position (0-1) is naturally: + // = content_offset / content_height + // + // The bottom position is 1 viewport-height below the top position: + // = (viewport_height + content_offset) / content_height + // + // Since everything must be scaled to the 1/8th units we must multiply by viewport_height: + // = viewport_height * (viewport_height + content_offset) / content_height + // = viewport_height * viewport_height + viewport_height * content_offset / content_height + // + // And we also want that rounded integer division as before. This transforms the + // `viewport_height * viewport_height` portion into the `thumb_enumerator` above. + // = thumb_numerator + viewport_height * content_offset / content_height + // + let thumb_bottom = (viewport_height * content_offset + thumb_numerator) / content_height; + // Now that the bottom is flush with the bottom of the track, we can calculate the top. + let thumb_top = (thumb_bottom - thumb_height).max(0); + + // Calculate the height of the top/bottom cell of the thumb. + let top_fract = (thumb_top % 8) as CoordType; + let bottom_fract = (thumb_bottom % 8) as CoordType; + + // Shift to absolute coordinates. + let thumb_top = ((thumb_top + 7) / 8) as CoordType + track.top; + let thumb_bottom = (thumb_bottom / 8) as CoordType + track.top; + + let track_clipped = track.intersect(clip_rect); + + // Clamp to the visible area. + let thumb_top_clipped = thumb_top.max(track_clipped.top); + let thumb_bottom_clipped = thumb_bottom.min(track_clipped.bottom); + + self.blend_bg(track_clipped, self.indexed(IndexedColor::BrightBlack)); + self.blend_fg(track_clipped, self.indexed(IndexedColor::BrightWhite)); + + // Draw the full blocks. + for y in thumb_top_clipped..thumb_bottom_clipped { + self.replace_text(y, track_clipped.left, track_clipped.right, "█"); + } + + // Draw the top/bottom cell of the thumb. + // U+2581 to U+2588, 1/8th block to 8/8th block elements glyphs: ▁▂▃▄▅▆▇█ + // In UTF8: E2 96 81 to E2 96 88 + let mut fract_buf = [0xE2, 0x96, 0x88]; + if top_fract != 0 { + fract_buf[2] = (0x88 - top_fract) as u8; + self.replace_text( + thumb_top_clipped - 1, + track_clipped.left, + track_clipped.right, + unsafe { std::str::from_utf8_unchecked(&fract_buf) }, + ); + } + if bottom_fract != 0 { + fract_buf[2] = (0x88 - bottom_fract) as u8; + let rect = self.replace_text( + thumb_bottom_clipped, + track_clipped.left, + track_clipped.right, + unsafe { std::str::from_utf8_unchecked(&fract_buf) }, + ); + self.blend_bg(rect, self.indexed(IndexedColor::BrightWhite)); + self.blend_fg(rect, self.indexed(IndexedColor::BrightBlack)); + } + } + + #[inline] + pub fn indexed(&self, index: IndexedColor) -> u32 { + self.indexed_colors[index as usize] + } + + /// Blends a background color over the given rectangular area. + pub fn blend_bg(&mut self, target: Rect, bg: u32) { + Self::alpha_blend_rect(&mut self.bg_bitmap[..], target, self.size, bg); + } + + /// Blends a foreground color over the given rectangular area. + pub fn blend_fg(&mut self, target: Rect, fg: u32) { + if fg != 0 { + Self::alpha_blend_rect(&mut self.fg_bitmap[..], target, self.size, fg); + } else { + self.blend_rect_auto(target); + } + } + + /// Performs alpha blending on a rectangle inside the destination bitmap. + fn alpha_blend_rect(dst: &mut [u32], rect: Rect, size: Size, src: u32) { + let width = size.width; + let height = size.height; + let left = rect.left.clamp(0, width); + let right = rect.right.clamp(0, width); + let top = rect.top.clamp(0, height); + let bottom = rect.bottom.clamp(0, height); + + if left >= right || top >= bottom { + return; + } + + if (src & 0xff000000) == 0xff000000 { + for y in top..bottom { + let beg = (y * width + left) as usize; + let end = (y * width + right) as usize; + dst[beg..end].fill(src); + } + } else if (src & 0xff000000) != 0x00000000 { + for y in top..bottom { + let beg = (y * width + left) as usize; + let end = (y * width + right) as usize; + let mut off = beg; + + while { + let color = dst[off]; + + // Chunk into runs of the same color, so that we only call alpha_blend once per run. + let chunk_beg = off; + while { + off += 1; + off < end && dst[off] == color + } {} + let chunk_end = off; + + let color = Self::mix(color, src, 1.0, 1.0); + dst[chunk_beg..chunk_end].fill(color); + + off < end + } {} + } + } + } + + fn blend_rect_auto(&mut self, rect: Rect) { + let width = self.size.width; + let height = self.size.height; + let left = rect.left.clamp(0, width); + let right = rect.right.clamp(0, width); + let top = rect.top.clamp(0, height); + let bottom = rect.bottom.clamp(0, height); + + if left >= right || top >= bottom { + return; + } + + for y in top..bottom { + let beg = (y * width + left) as usize; + let end = (y * width + right) as usize; + let mut off = beg; + + while { + let bg = self.bg_bitmap[off]; + + // Chunk into runs of the same color, so that we only call Self::quick_is_dark once per run. + let chunk_beg = off; + while { + off += 1; + off < end && self.bg_bitmap[off] == bg + } {} + let chunk_end = off; + + let fg = self.auto_colors[Self::quick_is_dark(bg) as usize]; + self.fg_bitmap[chunk_beg..chunk_end].fill(fg); + + off < end + } {} + } + } + + fn mix(dst: u32, src: u32, dst_balance: f32, src_balance: f32) -> u32 { + let src_r = Self::srgb_to_linear(src & 0xff); + let src_g = Self::srgb_to_linear((src >> 8) & 0xff); + let src_b = Self::srgb_to_linear((src >> 16) & 0xff); + let src_a = (src >> 24) as f32 / 255.0f32; + let src_a = src_a * dst_balance; + + let dst_r = Self::srgb_to_linear(dst & 0xff); + let dst_g = Self::srgb_to_linear((dst >> 8) & 0xff); + let dst_b = Self::srgb_to_linear((dst >> 16) & 0xff); + let dst_a = (dst >> 24) as f32 / 255.0f32; + let dst_a = dst_a * src_balance; + + let out_a = src_a + dst_a * (1.0f32 - src_a); + let out_r = (src_r * src_a + dst_r * dst_a * (1.0f32 - src_a)) / out_a; + let out_g = (src_g * src_a + dst_g * dst_a * (1.0f32 - src_a)) / out_a; + let out_b = (src_b * src_a + dst_b * dst_a * (1.0f32 - src_a)) / out_a; + + (((out_a * 255.0f32) as u32) << 24) + | (Self::linear_to_srgb(out_b) << 16) + | (Self::linear_to_srgb(out_g) << 8) + | Self::linear_to_srgb(out_r) + } + + fn srgb_to_linear(c: u32) -> f32 { + let fc = c as f32 / 255.0f32; + if fc <= 0.04045f32 { + fc / 12.92f32 + } else { + ((fc + 0.055f32) / 1.055f32).powf(2.4f32) + } + } + + fn linear_to_srgb(c: f32) -> u32 { + if c <= 0.0031308f32 { + (c * 12.92f32 * 255.0f32) as u32 + } else { + ((1.055f32 * c.powf(1.0f32 / 2.4f32) - 0.055f32) * 255.0f32) as u32 + } + } + + fn quick_is_dark(c: u32) -> bool { + let r = c & 0xff; + let g = (c >> 8) & 0xff; + let b = (c >> 16) & 0xff; + // Rough approximation of the sRGB luminance Y = 0.2126 R + 0.7152 G + 0.0722 B. + let l = r * 3 + g * 10 + b; + l < 128 * 14 + } + + pub fn set_cursor(&mut self, pos: Point, overtype: bool) { + self.cursor = pos; + self.cursor_overtype = overtype; + } + + pub fn render(&mut self) -> String { + let mut result = String::new(); + result.push_str("\x1b[H"); + + let mut last_bg = self.bg_bitmap[0]; + let mut last_fg = self.fg_bitmap[0]; + // Invert the colors to force a color change on the first cell. + last_bg ^= 1; + last_fg ^= 1; + + for y in 0..self.size.height { + if y != 0 { + result.push_str("\r\n"); + } + + let line = &self.lines[y as usize][..]; + let line_bytes = line.as_bytes(); + let mut cfg = ucd::MeasurementConfig::new(&line_bytes); + + for x in 0..self.size.width { + let bg = self.bg_bitmap[(y * self.size.width + x) as usize]; + let fg = self.fg_bitmap[(y * self.size.width + x) as usize]; + if bg == last_bg && fg == last_fg { + continue; + } + + if x != 0 { + let beg = cfg.cursor().offset; + let end = cfg.goto_visual(Point { x, y: 0 }).offset; + result.push_str(&line[beg..end]); + } + + if last_bg != bg { + last_bg = bg; + _ = write!( + result, + "\x1b[48;2;{};{};{}m", + bg & 0xff, + (bg >> 8) & 0xff, + (bg >> 16) & 0xff + ); + } + + if last_fg != fg { + last_fg = fg; + _ = write!( + result, + "\x1b[38;2;{};{};{}m", + fg & 0xff, + (fg >> 8) & 0xff, + (fg >> 16) & 0xff + ); + } + } + + result.push_str(&line[cfg.cursor().offset..]); + } + + if self.cursor.x >= 0 && self.cursor.y >= 0 { + // CUP to the cursor position. + // DECSCUSR to set the cursor style. + // DECTCEM to show the cursor. + _ = write!( + result, + "\x1b[{};{}H\x1b[{} q\x1b[?25h", + self.cursor.y + 1, + self.cursor.x + 1, + if self.cursor_overtype { 1 } else { 5 } + ); + } else { + // DECTCEM to hide the cursor. + result.push_str("\x1b[?25l"); + } + + result + } +} + +pub fn mix(dst: u32, src: u32, balance: f32) -> u32 { + Framebuffer::mix(dst, src, 1.0 - balance, balance) +} diff --git a/src/fuzzy.rs b/src/fuzzy.rs new file mode 100644 index 0000000..79db73b --- /dev/null +++ b/src/fuzzy.rs @@ -0,0 +1,234 @@ +//! Fuzzy search algorithm based on the one used in VS Code (`/src/vs/base/common/fuzzyScorer.ts`). +//! Other algorithms exist, such as Sublime Text's, or the one used in `fzf`, +//! but I figured that this one is what lots of people may be familiar with. + +use crate::icu; + +pub type FuzzyScore = (i32, Vec); + +const NO_MATCH: i32 = 0; +const NO_SCORE: FuzzyScore = (NO_MATCH, Vec::new()); + +pub fn score_fuzzy(target: &str, query: &str, allow_non_contiguous_matches: bool) -> FuzzyScore { + if target.is_empty() || query.is_empty() { + return NO_SCORE; // return early if target or query are empty + } + + let target_lower = icu::fold_case(target); + let query_lower = icu::fold_case(query); + let target: Vec = target.chars().collect(); + let target_lower: Vec = target_lower.chars().collect(); + let query: Vec = query.chars().collect(); + let query_lower: Vec = query_lower.chars().collect(); + + if target.len() < query.len() { + return NO_SCORE; // impossible for query to be contained in target + } + + do_score_fuzzy( + &query, + &query_lower, + &target, + &target_lower, + allow_non_contiguous_matches, + ) +} + +fn do_score_fuzzy( + query: &[char], + query_lower: &[char], + target: &[char], + target_lower: &[char], + allow_non_contiguous_matches: bool, +) -> FuzzyScore { + let mut scores = vec![0; query.len() * target.len()]; + let mut matches = vec![0; query.len() * target.len()]; + + // + // Build Scorer Matrix: + // + // The matrix is composed of query q and target t. For each index we score + // q[i] with t[i] and compare that with the previous score. If the score is + // equal or larger, we keep the match. In addition to the score, we also keep + // the length of the consecutive matches to use as boost for the score. + // + // t a r g e t + // q + // u + // e + // r + // y + // + for query_index in 0..query.len() { + let query_index_offset = query_index * target.len(); + let query_index_previous_offset = if query_index > 0 { + (query_index - 1) * target.len() + } else { + 0 + }; + + for target_index in 0..target.len() { + let current_index = query_index_offset + target_index; + let diag_index = if query_index > 0 && target_index > 0 { + query_index_previous_offset + target_index - 1 + } else { + 0 + }; + let left_score = if target_index > 0 { + scores[current_index - 1] + } else { + 0 + }; + let diag_score = if query_index > 0 && target_index > 0 { + scores[diag_index] + } else { + 0 + }; + let matches_sequence_len = if query_index > 0 && target_index > 0 { + matches[diag_index] + } else { + 0 + }; + + // If we are not matching on the first query character any more, we only produce a + // score if we had a score previously for the last query index (by looking at the diagScore). + // This makes sure that the query always matches in sequence on the target. For example + // given a target of "ede" and a query of "de", we would otherwise produce a wrong high score + // for query[1] ("e") matching on target[0] ("e") because of the "beginning of word" boost. + let score = if diag_score == 0 && query_index != 0 { + 0 + } else { + compute_char_score( + query[query_index], + query_lower[query_index], + if target_index != 0 { + Some(target[target_index - 1]) + } else { + None + }, + target[target_index], + target_lower[target_index], + matches_sequence_len, + ) + }; + + // We have a score and its equal or larger than the left score + // Match: sequence continues growing from previous diag value + // Score: increases by diag score value + let is_valid_score = score != 0 && diag_score + score >= left_score; + if is_valid_score + && ( + // We don't need to check if it's contiguous if we allow non-contiguous matches + allow_non_contiguous_matches || + // We must be looking for a contiguous match. + // Looking at an index higher than 0 in the query means we must have already + // found out this is contiguous otherwise there wouldn't have been a score + query_index > 0 || + // lastly check if the query is completely contiguous at this index in the target + target_lower[target_index..].starts_with(&query_lower) + ) + { + matches[current_index] = matches_sequence_len + 1; + scores[current_index] = diag_score + score; + } else { + // We either have no score or the score is lower than the left score + // Match: reset to 0 + // Score: pick up from left hand side + matches[current_index] = NO_MATCH; + scores[current_index] = left_score; + } + } + } + + // Restore Positions (starting from bottom right of matrix) + let mut positions = Vec::new(); + + if query.len() != 0 && target.len() != 0 { + let mut query_index = query.len() - 1; + let mut target_index = target.len() - 1; + + loop { + let current_index = query_index * target.len() + target_index; + if matches[current_index] == NO_MATCH { + if target_index == 0 { + break; + } + target_index -= 1; // go left + } else { + positions.push(target_index); + + // go up and left + if query_index == 0 || target_index == 0 { + break; + } + query_index -= 1; + target_index -= 1; + } + } + + positions.reverse(); + } + + (scores[query.len() * target.len() - 1], positions) +} + +fn compute_char_score( + query: char, + query_lower: char, + target_prev: Option, + target_curr: char, + target_curr_lower: char, + matches_sequence_len: i32, +) -> i32 { + let mut score = 0; + + if !consider_as_equal(query_lower, target_curr_lower) { + return score; // no match of characters + } + + // Character match bonus + score += 1; + + // Consecutive match bonus + if matches_sequence_len > 0 { + score += matches_sequence_len * 5; + } + + // Same case bonus + if query == target_curr { + score += 1; + } + + if let Some(target_prev) = target_prev { + // After separator bonus + let separator_bonus = score_separator_at_pos(target_prev); + if separator_bonus > 0 { + score += separator_bonus; + } + // Inside word upper case bonus (camel case). We only give this bonus if we're not in a contiguous sequence. + // For example: + // NPE => NullPointerException = boost + // HTTP => HTTP = not boost + else if target_curr != target_curr_lower && matches_sequence_len == 0 { + score += 2; + } + } else { + // Start of word bonus + score += 8; + } + + score +} + +fn consider_as_equal(a: char, b: char) -> bool { + // Special case path separators: ignore platform differences + a == b || a == '/' || a == '\\' && b == '/' || b == '\\' +} + +fn score_separator_at_pos(ch: char) -> i32 { + match ch { + '/' | '\\' => 5, // prefer path separators... + '_' | '-' | '.' | ' ' | '\'' | '"' | ':' => 4, // ...over other separators + _ => 0, + } +} diff --git a/src/helpers.rs b/src/helpers.rs new file mode 100644 index 0000000..c880c78 --- /dev/null +++ b/src/helpers.rs @@ -0,0 +1,382 @@ +use std::borrow::Cow; +use std::cmp::Ordering; +use std::ffi::{CStr, CString, OsStr, OsString, c_char}; +use std::mem; +use std::path::{Path, PathBuf}; +use std::slice; +use std::str; + +pub type CoordType = i32; + +pub const COORD_TYPE_MIN: CoordType = -2147483647 - 1; +pub const COORD_TYPE_MAX: CoordType = 2147483647; +pub const COORD_TYPE_SAFE_MIN: CoordType = -32767 - 1; +pub const COORD_TYPE_SAFE_MAX: CoordType = 32767; + +#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)] +pub struct Point { + pub x: CoordType, + pub y: CoordType, +} + +impl Point { + pub const MIN: Point = Point { + x: COORD_TYPE_MIN, + y: COORD_TYPE_MIN, + }; + pub const MAX: Point = Point { + x: COORD_TYPE_MAX, + y: COORD_TYPE_MAX, + }; +} + +impl PartialOrd for Point { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Point { + fn cmp(&self, other: &Self) -> Ordering { + match self.y.cmp(&other.y) { + Ordering::Equal => self.x.cmp(&other.x), + ord => ord, + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)] +pub struct Size { + pub width: CoordType, + pub height: CoordType, +} + +impl Size { + pub fn as_rect(&self) -> Rect { + Rect { + left: 0, + top: 0, + right: self.width, + bottom: self.height, + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, Default, Debug)] +pub struct Rect { + pub left: CoordType, + pub top: CoordType, + pub right: CoordType, + pub bottom: CoordType, +} + +impl Rect { + pub fn one(value: CoordType) -> Self { + Self { + left: value, + top: value, + right: value, + bottom: value, + } + } + + pub fn two(top_bottom: CoordType, left_right: CoordType) -> Self { + Self { + left: left_right, + top: top_bottom, + right: left_right, + bottom: top_bottom, + } + } + + pub fn three(top: CoordType, left_right: CoordType, bottom: CoordType) -> Self { + Self { + left: left_right, + top, + right: left_right, + bottom, + } + } + + pub fn is_empty(&self) -> bool { + self.left >= self.right || self.top >= self.bottom + } + + pub fn width(&self) -> CoordType { + self.right - self.left + } + + pub fn height(&self) -> CoordType { + self.bottom - self.top + } + + pub fn contains(&self, point: Point) -> bool { + point.x >= self.left && point.x < self.right && point.y >= self.top && point.y < self.bottom + } + + pub fn intersect(&self, rhs: Self) -> Self { + let l = self.left.max(rhs.left); + let t = self.top.max(rhs.top); + let r = self.right.min(rhs.right); + let b = self.bottom.min(rhs.bottom); + + // Ensure that the size is non-negative. This avoids bugs, + // because some height/width is negative all of a sudden. + let r = l.max(r); + let b = t.max(b); + + Rect { + left: l, + top: t, + right: r, + bottom: b, + } + } +} + +unsafe fn wyr3(p: *const u8, k: usize) -> u64 { + let p0 = unsafe { p.read() as u64 }; + let p1 = unsafe { p.add(k >> 1).read() as u64 }; + let p2 = unsafe { p.add(k - 1).read() as u64 }; + (p0 << 16) | (p1 << 8) | p2 +} + +unsafe fn wyr4(p: *const u8) -> u64 { + unsafe { (p as *const u32).read_unaligned() as u64 } +} + +unsafe fn wyr8(p: *const u8) -> u64 { + unsafe { (p as *const u64).read_unaligned() } +} + +// This is a weak mix function on its own. It may be worth considering +// replacing external uses of this function with a stronger one. +// On the other hand, it's very fast. +pub fn wymix(lhs: u64, rhs: u64) -> u64 { + let lhs = lhs as u128; + let rhs = rhs as u128; + let r = lhs * rhs; + (r >> 64) as u64 ^ (r as u64) +} + +// The venerable wyhash hash function. It's fast and has good statistical properties. +// It's in the public domain. +pub fn hash(mut seed: u64, data: &[u8]) -> u64 { + unsafe { + const S0: u64 = 0xa0761d6478bd642f; + const S1: u64 = 0xe7037ed1a0b428db; + const S2: u64 = 0x8ebc6af09c88c6e3; + const S3: u64 = 0x589965cc75374cc3; + + let len = data.len(); + let mut p = data.as_ptr(); + let a; + let b; + + seed ^= S0; + + if len <= 16 { + if len >= 4 { + a = (wyr4(p) << 32) | wyr4(p.add((len >> 3) << 2)); + b = (wyr4(p.add(len - 4)) << 32) | wyr4(p.add(len - 4 - ((len >> 3) << 2))); + } else if len > 0 { + a = wyr3(p, len); + b = 0; + } else { + a = 0; + b = 0; + } + } else { + let mut i = len; + if i > 48 { + let mut seed1 = seed; + let mut seed2 = seed; + while { + seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed); + seed1 = wymix(wyr8(p.add(16)) ^ S2, wyr8(p.add(24)) ^ seed1); + seed2 = wymix(wyr8(p.add(32)) ^ S3, wyr8(p.add(40)) ^ seed2); + p = p.add(48); + i -= 48; + i > 48 + } {} + seed ^= seed1 ^ seed2; + } + while i > 16 { + seed = wymix(wyr8(p) ^ S1, wyr8(p.add(8)) ^ seed); + i -= 16; + p = p.add(16); + } + a = wyr8(p.offset(i as isize - 16)); + b = wyr8(p.offset(i as isize - 8)); + } + + wymix(S1 ^ (len as u64), wymix(a ^ S1, b ^ seed)) + } +} + +pub fn hash_str(seed: u64, s: &str) -> u64 { + hash(seed, s.as_bytes()) +} + +pub fn string_append_repeat(dst: &mut String, ch: char, total_copies: usize) { + if total_copies == 0 { + return; + } + + let buf = unsafe { dst.as_mut_vec() }; + + if ch.is_ascii() { + // Compiles down to `memset()`. + buf.extend(std::iter::repeat_n(ch as u8, total_copies)); + } else { + // Implements efficient string padding using quadratic duplication. + let mut utf8_buf = [0; 4]; + let utf8 = ch.encode_utf8(&mut utf8_buf).as_bytes(); + let initial_len = buf.len(); + let added_len = utf8.len() * total_copies; + let final_len = initial_len + added_len; + + buf.reserve(added_len); + buf.extend_from_slice(utf8); + + while buf.len() != final_len { + let end = (final_len - buf.len() + initial_len).min(buf.len()); + buf.extend_from_within(initial_len..end); + } + } +} + +/// `std::cmp::minmax` is unstable, as per usual. +pub fn minmax(v1: T, v2: T) -> [T; 2] +where + T: Ord, +{ + if v2 < v1 { [v2, v1] } else { [v1, v2] } +} + +pub struct DisplayablePathBuf { + value: PathBuf, + str: Cow<'static, str>, +} + +impl DisplayablePathBuf { + pub fn new(value: PathBuf) -> Self { + let str = value.to_string_lossy(); + let str = unsafe { mem::transmute(str) }; + Self { value, str } + } + + pub fn as_path(&self) -> &Path { + &self.value + } + + pub fn as_str(&self) -> &str { + &self.str + } + + pub fn as_bytes(&self) -> &[u8] { + self.value.as_os_str().as_encoded_bytes() + } + + pub fn clone_path_buf(&self) -> PathBuf { + self.value.clone() + } + + pub fn take(self) -> PathBuf { + self.value + } +} + +impl Default for DisplayablePathBuf { + fn default() -> Self { + Self { + value: PathBuf::default(), + str: Cow::Borrowed(""), + } + } +} + +impl Clone for DisplayablePathBuf { + fn clone(&self) -> Self { + DisplayablePathBuf::new(self.value.clone()) + } +} + +impl From for DisplayablePathBuf { + fn from(s: OsString) -> DisplayablePathBuf { + DisplayablePathBuf::new(PathBuf::from(s)) + } +} + +impl> From<&T> for DisplayablePathBuf { + fn from(s: &T) -> DisplayablePathBuf { + DisplayablePathBuf::new(PathBuf::from(s)) + } +} + +pub struct DisplayableCString { + value: CString, + str: Cow<'static, str>, +} + +impl DisplayableCString { + pub fn new(value: CString) -> Self { + let str = value.to_string_lossy(); + let str = unsafe { mem::transmute(str) }; + Self { value, str } + } + + pub unsafe fn from_ptr(ptr: *const c_char) -> Self { + let s = unsafe { CStr::from_ptr(ptr) }; + Self::new(s.to_owned()) + } + + pub fn as_cstr(&self) -> &CStr { + &self.value + } + + pub fn as_str(&self) -> &str { + &self.str + } +} + +#[inline] +#[must_use] +pub const unsafe fn str_from_raw_parts<'a>(ptr: *const u8, len: usize) -> &'a str { + unsafe { str::from_utf8_unchecked(slice::from_raw_parts(ptr, len)) } +} + +#[inline] +#[must_use] +pub const unsafe fn str_from_raw_parts_mut<'a>(ptr: *mut u8, len: usize) -> &'a mut str { + unsafe { str::from_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, len)) } +} + +pub fn vec_insert_at(dst: &mut Vec, off: usize, src: &[T]) { + unsafe { + let dst_len = dst.len(); + let src_len = src.len(); + + // Make room for the new elements. NOTE that this must be done before + // we call as_mut_ptr, or else we risk accessing a stale pointer. + dst.reserve(src_len); + + let off = off.min(dst_len); + let ptr = dst.as_mut_ptr().add(off); + + if off < dst_len { + // Move the tail of the vector to make room for the new elements. + std::ptr::copy(ptr, ptr.add(src_len), dst_len - off); + } + + // Copy the new elements into the vector. + std::ptr::copy_nonoverlapping(src.as_ptr(), ptr, src_len); + // Update the length of the vector. + dst.set_len(dst_len + src_len); + } +} + +// Works just like `std::hint::cold_path`, but it's stable. +#[cold] +#[inline(always)] +pub const fn cold_path() {} diff --git a/src/icu.rs b/src/icu.rs new file mode 100644 index 0000000..815b226 --- /dev/null +++ b/src/icu.rs @@ -0,0 +1,1027 @@ +use crate::buffer::TextBuffer; +use crate::helpers::DisplayableCString; +use crate::utf8::Utf8Chars; +use crate::{apperr, sys}; +use std::ffi::{CStr, CString}; +use std::mem::MaybeUninit; +use std::ops::Range; +use std::ptr::{null, null_mut}; +use std::{cmp, mem}; + +static mut ENCODINGS: Option> = None; + +pub fn get_available_encodings() -> &'static [DisplayableCString] { + // OnceCell for people that want to put it into a static. + #[allow(static_mut_refs)] + unsafe { + if ENCODINGS.is_none() { + let mut encodings = Vec::new(); + + if let Ok(f) = init_if_needed() { + let mut n = 0; + loop { + let name = (f.ucnv_getAvailableName)(n); + if name.is_null() { + break; + } + encodings.push(DisplayableCString::from_ptr(name)); + n += 1; + } + } + + if encodings.is_empty() { + encodings.push(DisplayableCString::new(CString::new("UTF-8").unwrap())); + } + + ENCODINGS = Some(encodings); + } + ENCODINGS.as_ref().unwrap_unchecked().as_slice() + } +} + +pub struct Converter<'pivot> { + source: *mut icu_ffi::UConverter, + target: *mut icu_ffi::UConverter, + pivot_buffer: &'pivot mut [MaybeUninit], + pivot_source: *mut u16, + pivot_target: *mut u16, + reset: bool, +} + +impl Drop for Converter<'_> { + fn drop(&mut self) { + let f = assume_loaded(); + unsafe { (f.ucnv_close)(self.source) }; + unsafe { (f.ucnv_close)(self.target) }; + } +} + +impl<'pivot> Converter<'pivot> { + pub fn new( + pivot_buffer: &'pivot mut [MaybeUninit], + source_encoding: &str, + target_encoding: &str, + ) -> apperr::Result { + let f = init_if_needed()?; + + let source_encoding = Self::append_nul(source_encoding); + let target_encoding = Self::append_nul(target_encoding); + + let mut status = icu_ffi::U_ZERO_ERROR; + let source = unsafe { (f.ucnv_open)(source_encoding.as_ptr(), &mut status) }; + let target = unsafe { (f.ucnv_open)(target_encoding.as_ptr(), &mut status) }; + if status.is_failure() { + if !source.is_null() { + unsafe { (f.ucnv_close)(source) }; + } + if !target.is_null() { + unsafe { (f.ucnv_close)(target) }; + } + return Err(status.as_error()); + } + + let pivot_source = pivot_buffer.as_mut_ptr() as *mut u16; + let pivot_target = unsafe { pivot_source.add(pivot_buffer.len()) }; + + Ok(Self { + source, + target, + pivot_buffer, + pivot_source, + pivot_target, + reset: true, + }) + } + + fn append_nul(input: &str) -> String { + format!("{}\0", input) + } + + pub fn convert(&mut self, input: &[u8], output: &mut [u8]) -> apperr::Result<(usize, usize)> { + let f = assume_loaded(); + + let input_beg = input.as_ptr(); + let input_end = unsafe { input_beg.add(input.len()) }; + let mut input_ptr = input_beg; + + let output_beg = output.as_mut_ptr(); + let output_end = unsafe { output_beg.add(output.len()) }; + let mut output_ptr = output_beg; + + let pivot_beg = self.pivot_buffer.as_mut_ptr() as *mut u16; + let pivot_end = unsafe { pivot_beg.add(self.pivot_buffer.len()) }; + + let flush = input.is_empty(); + let mut status = icu_ffi::U_ZERO_ERROR; + + unsafe { + (f.ucnv_convertEx)( + /* target_cnv */ self.target, + /* source_cnv */ self.source, + /* target */ &mut output_ptr, + /* target_limit */ output_end, + /* source */ &mut input_ptr, + /* source_limit */ input_end, + /* pivot_start */ pivot_beg, + /* pivot_source */ &mut self.pivot_source, + /* pivot_target */ &mut self.pivot_target, + /* pivot_limit */ pivot_end, + /* reset */ self.reset, + /* flush */ flush, + /* status */ &mut status, + ); + } + + self.reset = false; + if status.is_failure() && status != icu_ffi::U_BUFFER_OVERFLOW_ERROR { + return Err(status.as_error()); + } + + let input_advance = unsafe { input_ptr.offset_from(input_beg) as usize }; + let output_advance = unsafe { output_ptr.offset_from(output_beg) as usize }; + Ok((input_advance, output_advance)) + } +} + +// In benchmarking, I found that the performance does not really change much by changing this value. +// I picked 64 because it seemed like a reasonable lower bound. +const CACHE_SIZE: usize = 64; + +// Caches a chunk of TextBuffer contents (UTF-8) in UTF-16 format. +struct Cache { + /// The translated text. Contains `len`-many valid items. + utf16: [u16; CACHE_SIZE], + /// For each character in `utf16` this stores the offset in the `TextBuffer`, + /// relative to the start offset stored in `native_beg`. + /// This has the same length as `utf16`. + utf16_to_utf8_offsets: [u16; CACHE_SIZE], + /// `utf8_to_utf16_offsets[native_offset - native_beg]` will tell you which character + /// in `utf16` maps to the given `native_offset` in the underlying `TextBuffer`. + /// Contains `native_end - native_beg`-many valid items. + utf8_to_utf16_offsets: [u16; CACHE_SIZE], + + /// The number of valid items in `utf16`. + utf16_len: usize, + native_indexing_limit: usize, + + /// The range of UTF-8 text in the `TextBuffer` that this chunk covers. + utf8_range: Range, +} + +struct DoubleCache { + cache: [Cache; 2], + /// You can consider this a 1 bit index into `cache`. + mru: bool, +} + +// I initially did this properly with a PhantomData marker for the TextBuffer lifetime, +// but it was a pain so now I don't. Not a big deal - its only use is in a self-referential +// struct in TextBuffer which Rust can't deal with anyway. +pub struct Text(&'static mut icu_ffi::UText); + +impl Drop for Text { + fn drop(&mut self) { + let f = assume_loaded(); + unsafe { (f.utext_close)(self.0) }; + } +} + +impl Text { + pub unsafe fn new(tb: &TextBuffer) -> apperr::Result { + let f = init_if_needed()?; + + let mut status = icu_ffi::U_ZERO_ERROR; + let ptr = + unsafe { (f.utext_setup)(null_mut(), size_of::() as i32, &mut status) }; + if status.is_failure() { + return Err(status.as_error()); + } + + const FUNCS: icu_ffi::UTextFuncs = icu_ffi::UTextFuncs { + table_size: size_of::() as i32, + reserved1: 0, + reserved2: 0, + reserved3: 0, + clone: Some(utext_clone), + native_length: Some(utext_native_length), + access: Some(utext_access), + extract: None, + replace: None, + copy: None, + map_offset_to_native: Some(utext_map_offset_to_native), + map_native_index_to_utf16: Some(utext_map_native_index_to_utf16), + close: None, + spare1: None, + spare2: None, + spare3: None, + }; + + let ut = unsafe { &mut *ptr }; + ut.p_funcs = &FUNCS; + ut.context = tb as *const TextBuffer as *mut _; + + // ICU unfortunately expects a `UText` instance to have valid contents after construction. + utext_access(ut, 0, true); + + Ok(Self(ut)) + } +} + +fn text_buffer_from_utext<'a>(ut: &icu_ffi::UText) -> &'a TextBuffer { + unsafe { &*(ut.context as *const TextBuffer) } +} + +fn double_cache_from_utext<'a>(ut: &icu_ffi::UText) -> &'a mut DoubleCache { + unsafe { &mut *(ut.p_extra as *mut DoubleCache) } +} + +extern "C" fn utext_clone( + dest: *mut icu_ffi::UText, + src: &icu_ffi::UText, + deep: bool, + status: &mut icu_ffi::UErrorCode, +) -> *mut icu_ffi::UText { + if status.is_failure() { + return null_mut(); + } + + if deep { + *status = icu_ffi::U_UNSUPPORTED_ERROR; + return null_mut(); + } + + let f = assume_loaded(); + let ut_ptr = unsafe { (f.utext_setup)(dest, size_of::() as i32, status) }; + if status.is_failure() { + return null_mut(); + } + + unsafe { + let ut = &mut *ut_ptr; + let src_double_cache = double_cache_from_utext(src); + let dst_double_cache = double_cache_from_utext(ut); + let src_cache = &src_double_cache.cache[src_double_cache.mru as usize]; + let dst_cache = &mut dst_double_cache.cache[dst_double_cache.mru as usize]; + + ut.provider_properties = src.provider_properties; + ut.chunk_native_limit = src.chunk_native_limit; + ut.native_indexing_limit = src.native_indexing_limit; + ut.chunk_native_start = src.chunk_native_start; + ut.chunk_offset = src.chunk_offset; + ut.chunk_length = src.chunk_length; + ut.chunk_contents = dst_cache.utf16.as_ptr(); + ut.p_funcs = src.p_funcs; + ut.context = src.context; + + // I wonder if it would make sense to use a Cow here. But probably not. + std::ptr::copy_nonoverlapping(src_cache, dst_cache, 1); + } + + ut_ptr +} + +extern "C" fn utext_native_length(ut: &mut icu_ffi::UText) -> i64 { + let tb = text_buffer_from_utext(ut); + tb.text_length() as i64 +} + +extern "C" fn utext_access(ut: &mut icu_ffi::UText, native_index: i64, forward: bool) -> bool { + let tb = text_buffer_from_utext(ut); + let mut index_contained = native_index; + + if !forward { + index_contained -= 1; + } + if index_contained < 0 || index_contained as usize >= tb.text_length() { + return false; + } + + let index_contained = index_contained as usize; + let native_index = native_index as usize; + + let double_cache = double_cache_from_utext(ut); + + for cache in &double_cache.cache { + if cache.utf8_range.contains(&index_contained) { + ut.chunk_contents = cache.utf16.as_ptr(); + ut.chunk_length = cache.utf16_len as i32; + ut.chunk_offset = + cache.utf8_to_utf16_offsets[native_index - cache.utf8_range.start] as i32; + ut.chunk_native_start = cache.utf8_range.start as i64; + ut.chunk_native_limit = cache.utf8_range.end as i64; + ut.native_indexing_limit = cache.native_indexing_limit as i32; + return true; + } + } + + // Turn the least recently used cache into the most recently used one. + double_cache.mru = !double_cache.mru; + let cache = &mut double_cache.cache[double_cache.mru as usize]; + + // In order to safely fit any UTF-8 character into our cache, + // we must assume the worst case of a 4-byte long encoding. + const UTF16_LEN_LIMIT: usize = CACHE_SIZE - 4; + let utf8_len_limit; + let native_start; + + if forward { + utf8_len_limit = (tb.text_length() - native_index).min(UTF16_LEN_LIMIT); + native_start = native_index; + } else { + // The worst case ratio for UTF-8 to UTF-16 is 1:1, when the text is ASCII. + // This allows us to safely subtract the UTF-16 buffer size + // and assume that whatever we read as UTF-8 will fit. + // TODO: Test what happens if you have lots of invalid UTF-8 text blow up to U+FFFD. + utf8_len_limit = native_index.min(UTF16_LEN_LIMIT); + + // Since simply subtracting an offset may end up in the middle of a codepoint sequence, + // we must align the offset to the next codepoint boundary. + // Here we skip trail bytes until we find a lead. + let mut beg = native_index - utf8_len_limit; + let chunk = tb.read_forward(beg); + for &c in chunk { + if c & 0b1100_0000 != 0b1000_0000 { + break; + } + beg += 1; + } + + native_start = beg; + } + + // Translate the given range from UTF-8 to UTF-16. + // NOTE: This code makes the assumption that the `native_index` is always + // at UTF-8 codepoint boundaries which technically isn't guaranteed. + let mut utf16_len = 0; + let mut utf8_len = 0; + let mut ascii_len = 0; + 'outer: loop { + let initial_utf8_len = utf8_len; + let chunk = tb.read_forward(native_start + utf8_len); + if chunk.is_empty() { + break; + } + + let mut it = Utf8Chars::new(chunk, 0); + + // If we've only seen ASCII so far we can fast-pass the UTF-16 translation, + // because we can just widen from u8 -> u16. + if utf16_len == ascii_len { + let haystack = &chunk[..utf8_len_limit - ascii_len]; + // When it comes to performance, and the search space is small (which it is here), + // it's always a good idea to keep the loops small and tight... + let len = haystack + .iter() + .position(|&c| c >= 0x80) + .unwrap_or(haystack.len()); + + // ...In this case it allows the compiler to vectorize this loop and double + // the performance. Luckily, llvm doesn't unroll the loop, which is great, + // because `len` will always be a relatively small number. + for &c in &chunk[..len] { + unsafe { + *cache.utf16.get_unchecked_mut(ascii_len) = c as u16; + *cache.utf16_to_utf8_offsets.get_unchecked_mut(ascii_len) = ascii_len as u16; + *cache.utf8_to_utf16_offsets.get_unchecked_mut(ascii_len) = ascii_len as u16; + } + ascii_len += 1; + } + + utf16_len += len; + utf8_len += len; + it.seek(len); + if ascii_len >= UTF16_LEN_LIMIT { + break; + } + } + + // TODO: This loop is the slow part of our uregex search. May be worth optimizing. + loop { + let Some(c) = it.next() else { + break; + }; + + // Thanks to our `if utf16_len >= utf16_limit` check, + // we can safely assume that this will fit. + unsafe { + let utf8_len_beg = utf8_len; + let utf8_len_end = initial_utf8_len + it.offset(); + + while utf8_len < utf8_len_end { + *cache.utf8_to_utf16_offsets.get_unchecked_mut(utf8_len) = utf16_len as u16; + utf8_len += 1; + } + + if c <= '\u{FFFF}' { + *cache.utf16.get_unchecked_mut(utf16_len) = c as u16; + *cache.utf16_to_utf8_offsets.get_unchecked_mut(utf16_len) = utf8_len_beg as u16; + utf16_len += 1; + } else { + let c = c as u32 - 0x1_0000; + *cache.utf16.get_unchecked_mut(utf16_len) = (c >> 10) as u16 | 0xD800; + *cache.utf16_to_utf8_offsets.get_unchecked_mut(utf16_len) = utf8_len_beg as u16; + utf16_len += 1; + *cache.utf16.get_unchecked_mut(utf16_len) = (c & 0x3FF) as u16 | 0xDC00; + *cache.utf16_to_utf8_offsets.get_unchecked_mut(utf16_len) = utf8_len_beg as u16; + utf16_len += 1; + } + } + + if utf16_len >= UTF16_LEN_LIMIT || utf8_len >= utf8_len_limit { + break 'outer; + } + } + } + + // Allow for looking up past-the-end indices via + // `utext_map_offset_to_native` and `utext_map_native_index_to_utf16`. + cache.utf16_to_utf8_offsets[utf16_len] = utf8_len as u16; + cache.utf8_to_utf16_offsets[utf8_len] = utf16_len as u16; + + let native_limit = native_index + utf8_len; + cache.utf16_len = utf16_len; + cache.utf8_range = native_start..native_limit; + + ut.chunk_contents = cache.utf16.as_ptr(); + ut.chunk_length = cache.utf16_len as i32; + ut.chunk_offset = if forward { 0 } else { cache.utf16_len as i32 }; + ut.chunk_native_start = native_start as i64; + ut.chunk_native_limit = native_limit as i64; + // If the entire UTF-8 chunk is ASCII, we can tell ICU that it doesn't need to call + // utext_map_offset_to_native. For some reason, uregex calls that function *a lot*, + // literally half the CPU time is spent on it. + ut.native_indexing_limit = ascii_len as i32; + true +} + +#[inline(never)] +fn foo() {} + +extern "C" fn utext_map_offset_to_native(ut: &icu_ffi::UText) -> i64 { + debug_assert!(ut.chunk_offset >= 0 && ut.chunk_offset <= ut.chunk_length); + let double_cache = double_cache_from_utext(ut); + let cache = &double_cache.cache[double_cache.mru as usize]; + let off_rel = cache.utf16_to_utf8_offsets[ut.chunk_offset as usize]; + let off_abs = cache.utf8_range.start + off_rel as usize; + off_abs as i64 +} + +extern "C" fn utext_map_native_index_to_utf16(ut: &icu_ffi::UText, native_index: i64) -> i32 { + debug_assert!(native_index >= ut.chunk_native_start && native_index <= ut.chunk_native_limit); + let double_cache = double_cache_from_utext(ut); + let cache = &double_cache.cache[double_cache.mru as usize]; + let off_rel = cache.utf8_to_utf16_offsets[(native_index - ut.chunk_native_start) as usize]; + off_rel as i32 +} + +// Same reason here for not using a PhantomData marker as with `Text`. +pub struct Regex(&'static mut icu_ffi::URegularExpression); + +impl Drop for Regex { + fn drop(&mut self) { + let f = assume_loaded(); + unsafe { (f.uregex_close)(self.0) }; + } +} + +impl Regex { + pub const CASE_INSENSITIVE: i32 = icu_ffi::UREGEX_CASE_INSENSITIVE; + pub const MULTILINE: i32 = icu_ffi::UREGEX_MULTILINE; + pub const LITERAL: i32 = icu_ffi::UREGEX_LITERAL; + + pub unsafe fn new(pattern: &str, flags: i32, text: &Text) -> apperr::Result { + let f = init_if_needed()?; + unsafe { + let utf16: Vec = pattern.encode_utf16().collect(); + let mut status = icu_ffi::U_ZERO_ERROR; + + let ptr = (f.uregex_open)( + utf16.as_ptr(), + utf16.len() as i32, + icu_ffi::UREGEX_MULTILINE | icu_ffi::UREGEX_ERROR_ON_UNKNOWN_ESCAPES | flags, + None, + &mut status, + ); + // ICU describes the time unit as being dependent on CPU performance + // and "typically [in] the order of milliseconds", but this claim seems + // highly outdated. On my CPU from 2021, a limit of 4096 equals roughly 600ms. + (f.uregex_setTimeLimit)(ptr, 4096, &mut status); + (f.uregex_setStackLimit)(ptr, 4 * 1024 * 1024, &mut status); + (f.uregex_setUText)(ptr, text.0 as *const _ as *mut _, &mut status); + if status.is_failure() { + return Err(status.as_error()); + } + + Ok(Self(&mut *ptr)) + } + } + + pub fn reset(&mut self, index: usize) { + let f = assume_loaded(); + let mut status = icu_ffi::U_ZERO_ERROR; + unsafe { (f.uregex_reset64)(self.0, index as i64, &mut status) }; + } +} + +impl Iterator for Regex { + type Item = Range; + + fn next(&mut self) -> Option { + let f = assume_loaded(); + + let mut status = icu_ffi::U_ZERO_ERROR; + let ok = unsafe { (f.uregex_findNext)(self.0, &mut status) }; + if !ok { + return None; + } + + let start = unsafe { (f.uregex_start64)(self.0, 0, &mut status) }; + let end = unsafe { (f.uregex_end64)(self.0, 0, &mut status) }; + if status.is_failure() { + return None; + } + + let start = start.max(0); + let end = end.max(start); + Some(start as usize..end as usize) + } +} + +static mut ROOT_COLLATOR: Option<*mut icu_ffi::UCollator> = None; + +pub fn compare_strings(a: &[u8], b: &[u8]) -> cmp::Ordering { + // OnceCell for people that want to put it into a static. + #[allow(static_mut_refs)] + let coll = unsafe { + if ROOT_COLLATOR.is_none() { + ROOT_COLLATOR = Some(if let Ok(f) = init_if_needed() { + let mut status = icu_ffi::U_ZERO_ERROR; + (f.ucol_open)(c"".as_ptr(), &mut status) + } else { + null_mut() + }) + } + ROOT_COLLATOR.unwrap_unchecked() + }; + + if coll.is_null() { + a.cmp(b) + } else { + let f = assume_loaded(); + let mut status = icu_ffi::U_ZERO_ERROR; + let res = unsafe { + (f.ucol_strcollUTF8)( + coll, + a.as_ptr(), + a.len() as i32, + b.as_ptr(), + b.len() as i32, + &mut status, + ) + }; + + match res { + icu_ffi::UCollationResult::UCOL_EQUAL => cmp::Ordering::Equal, + icu_ffi::UCollationResult::UCOL_GREATER => cmp::Ordering::Greater, + icu_ffi::UCollationResult::UCOL_LESS => cmp::Ordering::Less, + } + } +} + +static mut ROOT_CASEMAP: Option<*mut icu_ffi::UCaseMap> = None; + +pub fn fold_case(input: &str) -> String { + // OnceCell for people that want to put it into a static. + #[allow(static_mut_refs)] + let casemap = unsafe { + if ROOT_CASEMAP.is_none() { + ROOT_CASEMAP = Some(if let Ok(f) = init_if_needed() { + let mut status = icu_ffi::U_ZERO_ERROR; + (f.ucasemap_open)(null(), 0, &mut status) + } else { + null_mut() + }) + } + ROOT_CASEMAP.unwrap_unchecked() + }; + + if !casemap.is_null() { + let f = assume_loaded(); + let mut status = icu_ffi::U_ZERO_ERROR; + let mut output = Vec::new(); + let mut output_len; + + // First, guess the output length: + // TODO: What's a good heuristic here? + { + output.reserve_exact(input.len() + 16); + let output = output.spare_capacity_mut(); + output_len = unsafe { + (f.ucasemap_utf8FoldCase)( + casemap, + output.as_mut_ptr() as *mut _, + output.len() as i32, + input.as_ptr() as *const _, + input.len() as i32, + &mut status, + ) + }; + } + + // If that failed to fit, retry with the correct length. + if status == icu_ffi::U_BUFFER_OVERFLOW_ERROR && output_len > 0 { + output.reserve_exact(output_len as usize); + let output = output.spare_capacity_mut(); + output_len = unsafe { + (f.ucasemap_utf8FoldCase)( + casemap, + output.as_mut_ptr() as *mut _, + output.len() as i32, + input.as_ptr() as *const _, + input.len() as i32, + &mut status, + ) + }; + } + + if status.is_success() && output_len > 0 { + unsafe { + output.set_len(output_len as usize); + } + return unsafe { String::from_utf8_unchecked(output) }; + } + } + + input.to_ascii_lowercase() +} + +#[allow(non_snake_case)] +struct LibraryFunctions { + ucnv_getAvailableName: icu_ffi::ucnv_getAvailableName, + ucnv_open: icu_ffi::ucnv_open, + ucnv_close: icu_ffi::ucnv_close, + ucnv_convertEx: icu_ffi::ucnv_convertEx, + + ucasemap_open: icu_ffi::ucasemap_open, + ucasemap_utf8FoldCase: icu_ffi::ucasemap_utf8FoldCase, + + ucol_open: icu_ffi::ucol_open, + ucol_strcollUTF8: icu_ffi::ucol_strcollUTF8, + + utext_setup: icu_ffi::utext_setup, + utext_close: icu_ffi::utext_close, + + uregex_open: icu_ffi::uregex_open, + uregex_close: icu_ffi::uregex_close, + uregex_setStackLimit: icu_ffi::uregex_setStackLimit, + uregex_setTimeLimit: icu_ffi::uregex_setTimeLimit, + uregex_setUText: icu_ffi::uregex_setUText, + uregex_reset64: icu_ffi::uregex_reset64, + uregex_findNext: icu_ffi::uregex_findNext, + uregex_start64: icu_ffi::uregex_start64, + uregex_end64: icu_ffi::uregex_end64, +} + +// SAFETY: +const LIBRARY_FUNCTIONS_NAMES: [&CStr; 19] = [ + c"ucnv_getAvailableName", + c"ucnv_open", + c"ucnv_close", + c"ucnv_convertEx", + // + c"ucasemap_open", + c"ucasemap_utf8FoldCase", + // + c"ucol_open", + c"ucol_strcollUTF8", + // + c"utext_setup", + c"utext_close", + // + c"uregex_open", + c"uregex_close", + c"uregex_setTimeLimit", + c"uregex_setStackLimit", + c"uregex_setUText", + c"uregex_reset64", + c"uregex_findNext", + c"uregex_start64", + c"uregex_end64", +]; + +enum LibraryFunctionsState { + Uninitialized, + Failed, + Loaded(LibraryFunctions), +} + +static mut LIBRARY_FUNCTIONS: LibraryFunctionsState = LibraryFunctionsState::Uninitialized; + +#[allow(static_mut_refs)] +fn init_if_needed() -> apperr::Result<&'static LibraryFunctions> { + #[cold] + fn load() { + unsafe { + LIBRARY_FUNCTIONS = LibraryFunctionsState::Failed; + + let Ok(icu) = sys::load_icu() else { + return; + }; + + type TransparentFunction = unsafe extern "C" fn() -> *const (); + + // OH NO I'M DOING A BAD THING + // + // If this assertion hits, you either forgot to update `LIBRARY_FUNCTIONS_NAMES` + // or you're on a platform where `dlsym` behaves different from classic UNIX and Windows. + // + // This code assumes that we can treat the `LibraryFunctions` struct containing various different function + // pointers as an array of `TransparentFunction` pointers. In C, this works on any platform that supports + // POSIX `dlsym` or equivalent, but I suspect Rust is once again being extra about it. In any case, that's + // still better than loading every function one by one, just to blow up our binary size for no reason. + const _: () = assert!( + mem::size_of::() + == mem::size_of::() * LIBRARY_FUNCTIONS_NAMES.len() + ); + + let mut funcs = MaybeUninit::::uninit(); + let mut ptr = funcs.as_mut_ptr() as *mut TransparentFunction; + + for name in LIBRARY_FUNCTIONS_NAMES { + let Ok(func) = sys::get_proc_address(icu, name) else { + return; + }; + ptr.write(func); + ptr = ptr.add(1); + } + + LIBRARY_FUNCTIONS = LibraryFunctionsState::Loaded(funcs.assume_init()); + } + } + + unsafe { + if matches!(&LIBRARY_FUNCTIONS, LibraryFunctionsState::Uninitialized) { + load(); + } + } + + match unsafe { &LIBRARY_FUNCTIONS } { + LibraryFunctionsState::Loaded(f) => Ok(f), + _ => Err(apperr::APP_ICU_MISSING), + } +} + +#[allow(static_mut_refs)] +fn assume_loaded() -> &'static LibraryFunctions { + match unsafe { &LIBRARY_FUNCTIONS } { + LibraryFunctionsState::Loaded(f) => f, + _ => unreachable!(), + } +} + +mod icu_ffi { + #![allow(non_camel_case_types)] + + use crate::apperr; + use std::ffi::c_char; + + #[derive(Copy, Clone, Eq, PartialEq)] + #[repr(transparent)] + pub struct UErrorCode(std::os::raw::c_int); + + impl UErrorCode { + pub fn is_success(&self) -> bool { + self.0 <= 0 + } + + pub fn is_failure(&self) -> bool { + self.0 > 0 + } + + pub fn as_error(&self) -> apperr::Error { + debug_assert!(self.0 > 0); + apperr::Error::new_icu(self.0 as u32) + } + } + + pub const U_ZERO_ERROR: UErrorCode = UErrorCode(0); + pub const U_BUFFER_OVERFLOW_ERROR: UErrorCode = UErrorCode(15); + pub const U_UNSUPPORTED_ERROR: UErrorCode = UErrorCode(16); + + pub struct UConverter; + + pub type ucnv_getAvailableName = unsafe extern "C" fn(n: i32) -> *mut c_char; + + pub type ucnv_open = + unsafe extern "C" fn(converter_name: *const u8, status: &mut UErrorCode) -> *mut UConverter; + + pub type ucnv_close = unsafe extern "C" fn(converter: *mut UConverter); + + pub type ucnv_convertEx = unsafe extern "C" fn( + target_cnv: *mut UConverter, + source_cnv: *mut UConverter, + target: *mut *mut u8, + target_limit: *const u8, + source: *mut *const u8, + source_limit: *const u8, + pivot_start: *mut u16, + pivot_source: *mut *mut u16, + pivot_target: *mut *mut u16, + pivot_limit: *const u16, + reset: bool, + flush: bool, + status: &mut UErrorCode, + ); + + pub struct UCaseMap; + + pub type ucasemap_open = unsafe extern "C" fn( + locale: *const c_char, + options: u32, + status: &mut UErrorCode, + ) -> *mut UCaseMap; + + pub type ucasemap_utf8FoldCase = unsafe extern "C" fn( + csm: *const UCaseMap, + dest: *mut c_char, + dest_capacity: i32, + src: *const c_char, + src_length: i32, + status: &mut UErrorCode, + ) -> i32; + + #[repr(C)] + pub enum UCollationResult { + UCOL_EQUAL = 0, + UCOL_GREATER = 1, + UCOL_LESS = -1, + } + + #[repr(C)] + pub struct UCollator; + + pub type ucol_open = + unsafe extern "C" fn(loc: *const c_char, status: &mut UErrorCode) -> *mut UCollator; + + pub type ucol_strcollUTF8 = unsafe extern "C" fn( + coll: *mut UCollator, + source: *const u8, + source_length: i32, + target: *const u8, + target_length: i32, + status: &mut UErrorCode, + ) -> UCollationResult; + + // UText callback functions + pub type UTextClone = unsafe extern "C" fn( + dest: *mut UText, + src: &UText, + deep: bool, + status: &mut UErrorCode, + ) -> *mut UText; + pub type UTextNativeLength = unsafe extern "C" fn(ut: &mut UText) -> i64; + pub type UTextAccess = + unsafe extern "C" fn(ut: &mut UText, native_index: i64, forward: bool) -> bool; + pub type UTextExtract = unsafe extern "C" fn( + ut: &mut UText, + native_start: i64, + native_limit: i64, + dest: *mut u16, + dest_capacity: i32, + status: &mut UErrorCode, + ) -> i32; + pub type UTextReplace = unsafe extern "C" fn( + ut: &mut UText, + native_start: i64, + native_limit: i64, + replacement_text: *const u16, + replacement_length: i32, + status: &mut UErrorCode, + ) -> i32; + pub type UTextCopy = unsafe extern "C" fn( + ut: &mut UText, + native_start: i64, + native_limit: i64, + native_dest: i64, + move_text: bool, + status: &mut UErrorCode, + ); + pub type UTextMapOffsetToNative = unsafe extern "C" fn(ut: &UText) -> i64; + pub type UTextMapNativeIndexToUTF16 = + unsafe extern "C" fn(ut: &UText, native_index: i64) -> i32; + pub type UTextClose = unsafe extern "C" fn(ut: &mut UText); + + #[repr(C)] + pub struct UTextFuncs { + pub table_size: i32, + pub reserved1: i32, + pub reserved2: i32, + pub reserved3: i32, + pub clone: Option, + pub native_length: Option, + pub access: Option, + pub extract: Option, + pub replace: Option, + pub copy: Option, + pub map_offset_to_native: Option, + pub map_native_index_to_utf16: Option, + pub close: Option, + pub spare1: Option, + pub spare2: Option, + pub spare3: Option, + } + + #[repr(C)] + pub struct UText { + pub magic: u32, + pub flags: i32, + pub provider_properties: i32, + pub size_of_struct: i32, + pub chunk_native_limit: i64, + pub extra_size: i32, + pub native_indexing_limit: i32, + pub chunk_native_start: i64, + pub chunk_offset: i32, + pub chunk_length: i32, + pub chunk_contents: *const u16, + pub p_funcs: &'static UTextFuncs, + pub p_extra: *mut std::ffi::c_void, + pub context: *mut std::ffi::c_void, + pub p: *mut std::ffi::c_void, + pub q: *mut std::ffi::c_void, + pub r: *mut std::ffi::c_void, + pub priv_p: *mut std::ffi::c_void, + pub a: i64, + pub b: i32, + pub c: i32, + pub priv_a: i64, + pub priv_b: i32, + pub priv_c: i32, + } + + pub const UTEXT_MAGIC: u32 = 0x345ad82c; + pub const UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE: i32 = 1; + pub const UTEXT_PROVIDER_STABLE_CHUNKS: i32 = 2; + pub const UTEXT_PROVIDER_WRITABLE: i32 = 3; + pub const UTEXT_PROVIDER_HAS_META_DATA: i32 = 4; + pub const UTEXT_PROVIDER_OWNS_TEXT: i32 = 5; + + pub type utext_setup = unsafe extern "C" fn( + ut: *mut UText, + extra_space: i32, + status: &mut UErrorCode, + ) -> *mut UText; + pub type utext_close = unsafe extern "C" fn(ut: *mut UText) -> *mut UText; + + #[repr(C)] + pub struct UParseError { + pub line: i32, + pub offset: i32, + pub pre_context: [u16; 16], + pub post_context: [u16; 16], + } + + #[repr(C)] + pub struct URegularExpression; + + pub const UREGEX_UNIX_LINES: i32 = 1; + pub const UREGEX_CASE_INSENSITIVE: i32 = 2; + pub const UREGEX_COMMENTS: i32 = 4; + pub const UREGEX_MULTILINE: i32 = 8; + pub const UREGEX_LITERAL: i32 = 16; + pub const UREGEX_DOTALL: i32 = 32; + pub const UREGEX_UWORD: i32 = 256; + pub const UREGEX_ERROR_ON_UNKNOWN_ESCAPES: i32 = 512; + + pub type uregex_open = unsafe extern "C" fn( + pattern: *const u16, + pattern_length: i32, + flags: i32, + pe: Option<&mut UParseError>, + status: &mut UErrorCode, + ) -> *mut URegularExpression; + pub type uregex_close = unsafe extern "C" fn(regexp: *mut URegularExpression); + pub type uregex_setTimeLimit = + unsafe extern "C" fn(regexp: *mut URegularExpression, limit: i32, status: &mut UErrorCode); + pub type uregex_setStackLimit = + unsafe extern "C" fn(regexp: *mut URegularExpression, limit: i32, status: &mut UErrorCode); + pub type uregex_setUText = unsafe extern "C" fn( + regexp: *mut URegularExpression, + text: *mut UText, + status: &mut UErrorCode, + ); + pub type uregex_reset64 = + unsafe extern "C" fn(regexp: *mut URegularExpression, index: i64, status: &mut UErrorCode); + pub type uregex_findNext = + unsafe extern "C" fn(regexp: *mut URegularExpression, status: &mut UErrorCode) -> bool; + pub type uregex_start64 = unsafe extern "C" fn( + regexp: *mut URegularExpression, + group_num: i32, + status: &mut UErrorCode, + ) -> i64; + pub type uregex_end64 = unsafe extern "C" fn( + regexp: *mut URegularExpression, + group_num: i32, + status: &mut UErrorCode, + ) -> i64; +} diff --git a/src/input.rs b/src/input.rs new file mode 100644 index 0000000..ad7cb3a --- /dev/null +++ b/src/input.rs @@ -0,0 +1,488 @@ +use crate::helpers::{Point, Size}; +use crate::vt; + +// TODO: Is this a good idea? I did it to allow typing `kbmod::CTRL | vk::A`. +// The reason it's an awkard u32 and not a struct is to hopefully make ABIs easier later. +// Of course you could just translate on the ABI boundary, but my hope is that this +// design lets me realize some restrictions early on that I can't foresee yet. +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct InputKey(u32); + +impl InputKey { + pub const fn new(v: u32) -> Self { + Self(v) + } + + pub const fn value(&self) -> u32 { + self.0 + } + + pub const fn key(&self) -> InputKey { + InputKey(self.0 & 0x00FFFFFF) + } + + pub const fn modifiers(&self) -> InputKeyMod { + InputKeyMod(self.0 & 0xFF000000) + } + + pub const fn modifiers_contains(&self, modifier: InputKeyMod) -> bool { + (self.0 & modifier.0) != 0 + } + + pub const fn with_modifiers(&self, modifiers: InputKeyMod) -> InputKey { + InputKey(self.0 | modifiers.0) + } +} +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct InputKeyMod(u32); + +impl InputKeyMod { + const fn new(v: u32) -> Self { + Self(v) + } + + pub const fn contains(&self, modifier: InputKeyMod) -> bool { + (self.0 & modifier.0) != 0 + } +} + +impl std::ops::BitOr for InputKey { + type Output = InputKey; + + fn bitor(self, rhs: InputKeyMod) -> InputKey { + InputKey(self.0 | rhs.0) + } +} + +impl std::ops::BitOr for InputKeyMod { + type Output = InputKey; + + fn bitor(self, rhs: InputKey) -> InputKey { + InputKey(self.0 | rhs.0) + } +} + +impl std::ops::BitOrAssign for InputKeyMod { + fn bitor_assign(&mut self, rhs: Self) { + self.0 |= rhs.0; + } +} + +// The codes defined here match the VK_* constants on Windows. +// It's a convenient way to handle keyboard input, even on other platforms. +pub mod vk { + use super::InputKey; + + pub const NULL: InputKey = InputKey::new(0x00); + pub const BACK: InputKey = InputKey::new(0x08); + pub const TAB: InputKey = InputKey::new(0x09); + pub const RETURN: InputKey = InputKey::new(0x0D); + pub const ESCAPE: InputKey = InputKey::new(0x1B); + pub const SPACE: InputKey = InputKey::new(0x20); + pub const PRIOR: InputKey = InputKey::new(0x21); + pub const NEXT: InputKey = InputKey::new(0x22); + + pub const END: InputKey = InputKey::new(0x23); + pub const HOME: InputKey = InputKey::new(0x24); + + pub const LEFT: InputKey = InputKey::new(0x25); + pub const UP: InputKey = InputKey::new(0x26); + pub const RIGHT: InputKey = InputKey::new(0x27); + pub const DOWN: InputKey = InputKey::new(0x28); + + pub const INSERT: InputKey = InputKey::new(0x2D); + pub const DELETE: InputKey = InputKey::new(0x2E); + + pub const A: InputKey = InputKey::new('A' as u32); + pub const B: InputKey = InputKey::new('B' as u32); + pub const C: InputKey = InputKey::new('C' as u32); + pub const D: InputKey = InputKey::new('D' as u32); + pub const E: InputKey = InputKey::new('E' as u32); + pub const F: InputKey = InputKey::new('F' as u32); + pub const G: InputKey = InputKey::new('G' as u32); + pub const H: InputKey = InputKey::new('H' as u32); + pub const I: InputKey = InputKey::new('I' as u32); + pub const J: InputKey = InputKey::new('J' as u32); + pub const K: InputKey = InputKey::new('K' as u32); + pub const L: InputKey = InputKey::new('L' as u32); + pub const M: InputKey = InputKey::new('M' as u32); + pub const N: InputKey = InputKey::new('N' as u32); + pub const O: InputKey = InputKey::new('O' as u32); + pub const P: InputKey = InputKey::new('P' as u32); + pub const Q: InputKey = InputKey::new('Q' as u32); + pub const R: InputKey = InputKey::new('R' as u32); + pub const S: InputKey = InputKey::new('S' as u32); + pub const T: InputKey = InputKey::new('T' as u32); + pub const U: InputKey = InputKey::new('U' as u32); + pub const V: InputKey = InputKey::new('V' as u32); + pub const W: InputKey = InputKey::new('W' as u32); + pub const X: InputKey = InputKey::new('X' as u32); + pub const Y: InputKey = InputKey::new('Y' as u32); + pub const Z: InputKey = InputKey::new('Z' as u32); + + pub const NUMPAD0: InputKey = InputKey::new(0x60); + pub const NUMPAD1: InputKey = InputKey::new(0x61); + pub const NUMPAD2: InputKey = InputKey::new(0x62); + pub const NUMPAD3: InputKey = InputKey::new(0x63); + pub const NUMPAD4: InputKey = InputKey::new(0x64); + pub const NUMPAD5: InputKey = InputKey::new(0x65); + pub const NUMPAD6: InputKey = InputKey::new(0x66); + pub const NUMPAD7: InputKey = InputKey::new(0x67); + pub const NUMPAD8: InputKey = InputKey::new(0x68); + pub const NUMPAD9: InputKey = InputKey::new(0x69); + pub const MULTIPLY: InputKey = InputKey::new(0x6A); + pub const ADD: InputKey = InputKey::new(0x6B); + pub const SEPARATOR: InputKey = InputKey::new(0x6C); + pub const SUBTRACT: InputKey = InputKey::new(0x6D); + pub const DECIMAL: InputKey = InputKey::new(0x6E); + pub const DIVIDE: InputKey = InputKey::new(0x6F); + + pub const F1: InputKey = InputKey::new(0x70); + pub const F2: InputKey = InputKey::new(0x71); + pub const F3: InputKey = InputKey::new(0x72); + pub const F4: InputKey = InputKey::new(0x73); + pub const F5: InputKey = InputKey::new(0x74); + pub const F6: InputKey = InputKey::new(0x75); + pub const F7: InputKey = InputKey::new(0x76); + pub const F8: InputKey = InputKey::new(0x77); + pub const F9: InputKey = InputKey::new(0x78); + pub const F10: InputKey = InputKey::new(0x79); + pub const F11: InputKey = InputKey::new(0x7A); + pub const F12: InputKey = InputKey::new(0x7B); + pub const F13: InputKey = InputKey::new(0x7C); + pub const F14: InputKey = InputKey::new(0x7D); + pub const F15: InputKey = InputKey::new(0x7E); + pub const F16: InputKey = InputKey::new(0x7F); + pub const F17: InputKey = InputKey::new(0x80); + pub const F18: InputKey = InputKey::new(0x81); + pub const F19: InputKey = InputKey::new(0x82); + pub const F20: InputKey = InputKey::new(0x83); + pub const F21: InputKey = InputKey::new(0x84); + pub const F22: InputKey = InputKey::new(0x85); + pub const F23: InputKey = InputKey::new(0x86); + pub const F24: InputKey = InputKey::new(0x87); +} + +pub mod kbmod { + use super::InputKeyMod; + + pub const NONE: InputKeyMod = InputKeyMod::new(0x00000000); + pub const CTRL: InputKeyMod = InputKeyMod::new(0x01000000); + pub const ALT: InputKeyMod = InputKeyMod::new(0x02000000); + pub const SHIFT: InputKeyMod = InputKeyMod::new(0x04000000); + + pub const CTRL_ALT: InputKeyMod = InputKeyMod::new(0x03000000); + pub const CTRL_SHIFT: InputKeyMod = InputKeyMod::new(0x05000000); + pub const ALT_SHIFT: InputKeyMod = InputKeyMod::new(0x06000000); + pub const CTRL_ALT_SHIFT: InputKeyMod = InputKeyMod::new(0x07000000); +} + +#[derive(Clone, Copy)] +pub struct InputText<'a> { + pub text: &'a str, + pub bracketed: bool, +} + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)] +pub enum InputMouseState { + #[default] + None, + + // These 3 carry their state between frames. + Left, + Middle, + Right, + + // These 2 get reset to None on the next frame. + Release, + Scroll, +} + +#[derive(Clone, Copy)] +pub struct InputMouse { + pub state: InputMouseState, + pub modifiers: InputKeyMod, + pub position: Point, + pub scroll: Point, +} + +pub enum Input<'input> { + Resize(Size), + Text(InputText<'input>), + Keyboard(InputKey), + Mouse(InputMouse), +} + +pub struct Parser { + want: bool, + buf: [u8; 3], + len: usize, +} + +impl Parser { + pub fn new() -> Self { + Self { + want: false, + buf: [0; 3], + len: 0, + } + } + + /// Turns VT sequences into keyboard, mouse, etc., inputs. + pub fn parse<'parser, 'vt, 'input>( + &'parser mut self, + stream: vt::Stream<'vt, 'input>, + ) -> Stream<'parser, 'vt, 'input> { + Stream { + parser: self, + stream, + } + } +} + +pub struct Stream<'parser, 'vt, 'input> { + parser: &'parser mut Parser, + stream: vt::Stream<'vt, 'input>, +} + +impl Stream<'_, '_, '_> { + /// Parses the next input action from the previously given input. + /// + /// Can't implement Iterator, because this is a "lending iterator". + pub fn next(&mut self) -> Option { + if self.parser.want { + return self.parse_x10_mouse_coordinates(); + } + + let token = self.stream.next()?; + + match token { + vt::Token::Text(text) => Some(Input::Text(InputText { + text, + bracketed: false, + })), + vt::Token::Ctrl(ch) => match ch { + '\0' | '\t' | '\r' => Some(Input::Keyboard(InputKey::new(ch as u32))), + ..='\x1a' => { + // Shift control code to A-Z + let key = ch as u32 | 0x40; + Some(Input::Keyboard(kbmod::CTRL | InputKey::new(key))) + } + '\x7f' => Some(Input::Keyboard(vk::BACK)), + _ => None, + }, + vt::Token::Esc(ch) => { + match ch { + '\0' => Some(Input::Keyboard(vk::ESCAPE)), + ' '..='~' => { + let ch = ch as u32; + let key = ch & !0x20; // Shift a-z to A-Z + let modifiers = if (ch & 0x20) != 0 { + kbmod::ALT + } else { + kbmod::ALT_SHIFT + }; + Some(Input::Keyboard(modifiers | InputKey::new(key))) + } + _ => None, + } + } + vt::Token::SS3(ch) => { + if ('P'..='S').contains(&ch) { + let key = vk::F1.value() + ch as u32 - 'P' as u32; + Some(Input::Keyboard(InputKey::new(key))) + } else { + None + } + } + vt::Token::Csi(csi) => { + match csi.final_byte { + 'A'..='H' => { + const LUT: [u8; 8] = [ + vk::UP.value() as u8, // A + vk::DOWN.value() as u8, // B + vk::RIGHT.value() as u8, // C + vk::LEFT.value() as u8, // D + 0, // E + vk::END.value() as u8, // F + 0, // G + vk::HOME.value() as u8, // H + ]; + let vk = LUT[csi.final_byte as usize - 'A' as usize]; + if vk != 0 { + return Some(Input::Keyboard( + InputKey::new(vk as u32) | Self::parse_modifiers(csi), + )); + } + None + } + 'Z' => return Some(Input::Keyboard(kbmod::SHIFT | vk::TAB)), + '~' => { + const LUT: [u8; 35] = [ + 0, + vk::HOME.value() as u8, // 1 + vk::INSERT.value() as u8, // 2 + vk::DELETE.value() as u8, // 3 + vk::END.value() as u8, // 4 + vk::PRIOR.value() as u8, // 5 + vk::NEXT.value() as u8, // 6 + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + vk::F5.value() as u8, // 15 + 0, + vk::F6.value() as u8, // 17 + vk::F7.value() as u8, // 18 + vk::F8.value() as u8, // 19 + vk::F9.value() as u8, // 20 + vk::F10.value() as u8, // 21 + 0, + vk::F11.value() as u8, // 23 + vk::F12.value() as u8, // 24 + vk::F13.value() as u8, // 25 + vk::F14.value() as u8, // 26 + 0, + vk::F15.value() as u8, // 28 + vk::F16.value() as u8, // 29 + 0, + vk::F17.value() as u8, // 31 + vk::F18.value() as u8, // 32 + vk::F19.value() as u8, // 33 + vk::F20.value() as u8, // 34 + ]; + let p0 = csi.params[0]; + if p0 >= 0 && p0 <= LUT.len() as i32 { + let vk = LUT[p0 as usize]; + if vk != 0 { + return Some(Input::Keyboard( + InputKey::new(vk as u32) | Self::parse_modifiers(csi), + )); + } + } + None + } + 'm' | 'M' if csi.private_byte == '<' => { + let btn = csi.params[0]; + let mut mouse = InputMouse { + state: InputMouseState::None, + modifiers: kbmod::NONE, + position: Point::default(), + scroll: Point::default(), + }; + + mouse.state = InputMouseState::None; + if (btn & 0x40) != 0 { + mouse.state = InputMouseState::Scroll; + mouse.scroll.y += if (btn & 0x01) != 0 { 3 } else { -3 }; + } else if csi.final_byte == 'M' { + const STATES: [InputMouseState; 4] = [ + InputMouseState::Left, + InputMouseState::Middle, + InputMouseState::Right, + InputMouseState::None, + ]; + mouse.state = STATES[(btn as usize) & 0x03]; + } + + mouse.modifiers = kbmod::NONE; + mouse.modifiers |= if (btn & 0x04) != 0 { + kbmod::SHIFT + } else { + kbmod::NONE + }; + mouse.modifiers |= if (btn & 0x08) != 0 { + kbmod::ALT + } else { + kbmod::NONE + }; + mouse.modifiers |= if (btn & 0x10f) != 0 { + kbmod::CTRL + } else { + kbmod::NONE + }; + + mouse.position.x = csi.params[1] - 1; + mouse.position.y = csi.params[2] - 1; + Some(Input::Mouse(mouse)) + } + 'M' if csi.param_count == 0 => { + self.parser.want = true; + None + } + 't' if csi.params[0] == 8 => { + // Window Size + let width = csi.params[2].clamp(1, 32767); + let height = csi.params[1].clamp(1, 32767); + Some(Input::Resize(Size { width, height })) + } + _ => None, + } + } + _ => None, + } + } + + /// Implements the X10 mouse protocol via `CSI M CbCxCy`. + /// + /// You want to send numeric mouse coordinates. + /// You have CSI sequences with numeric parameters. + /// So, of course you put the coordinates as shifted ASCII characters after + /// the end of the sequence. Limited coordinate range and complicated parsing! + /// This is so puzzling to me. The existence of this function makes me unhappy. + #[cold] + fn parse_x10_mouse_coordinates(&mut self) -> Option { + self.parser.len += self.stream.read(&mut self.parser.buf[self.parser.len..]); + if self.parser.len < 3 { + return None; + } + + let button = self.parser.buf[0] & 0b11; + let modifier = self.parser.buf[0] & 0b11100; + let x = self.parser.buf[1] as i32 - 0x21; + let y = self.parser.buf[2] as i32 - 0x21; + let action = match button { + 0 => InputMouseState::Left, + 1 => InputMouseState::Middle, + 2 => InputMouseState::Right, + _ => InputMouseState::None, + }; + let modifiers = match modifier { + 4 => kbmod::SHIFT, + 8 => kbmod::ALT, + 16 => kbmod::CTRL, + _ => kbmod::NONE, + }; + + self.parser.want = false; + self.parser.len = 0; + + Some(Input::Mouse(InputMouse { + state: action, + modifiers, + position: Point { x, y }, + scroll: Point::default(), + })) + } + + fn parse_modifiers(csi: &vt::Csi) -> InputKeyMod { + let mut modifiers = kbmod::NONE; + let p1 = (csi.params[1] - 1).max(0); + if (p1 & 0x01) != 0 { + modifiers |= kbmod::SHIFT; + } + if (p1 & 0x02) != 0 { + modifiers |= kbmod::ALT; + } + if (p1 & 0x04) != 0 { + modifiers |= kbmod::CTRL; + } + modifiers + } +} diff --git a/src/loc.rs b/src/loc.rs new file mode 100644 index 0000000..72a0925 --- /dev/null +++ b/src/loc.rs @@ -0,0 +1,663 @@ +use crate::sys; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum LocId { + Ctrl, + Alt, + Shift, + + // File menu + File, + FileSave, + FileSaveAs, + FileExit, + + // Edit menu + Edit, + EditUndo, + EditRedo, + EditCut, + EditCopy, + EditPaste, + EditFind, + EditReplace, + + // View menu + View, + ViewWordWrap, + + // Help menu + Help, + HelpAbout, + + // Exit dialog + UnsavedChangesDialogTitle, + UnsavedChangesDialogDescription, + UnsavedChangesDialogYes, + UnsavedChangesDialogNo, + UnsavedChangesDialogCancel, + + // About dialog + AboutDialogTitle, + AboutDialogDescription, + AboutDialogVersion, + + SearchLabel, + SearchClose, + SearchMatchCase, + SearchWholeWord, + SearchUseRegex, + + EncodingReopen, + EncodingConvert, + + IndentationTabs, + IndentationSpaces, + + SaveAsDialogTitle, + SaveAsDialogFilenameLabel, + + Count, +} + +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, PartialEq, Eq)] +enum LangId { + // Base language. It's always the first one. + en, + + // Other languages. Sorted alphabetically. + de, + es, + fr, + it, + ja, + ko, + pt_br, + ru, + zh_hans, + zh_hant, + + Count, +} + +#[rustfmt::skip] +const S_LANG_LUT: [[&str; LangId::Count as usize]; LocId::Count as usize] = [ + // Ctrl + [ + /* en */ "Ctrl", + /* de */ "Strg", + /* es */ "Ctrl", + /* fr */ "Ctrl", + /* it */ "Ctrl", + /* ja */ "Ctrl", + /* ko */ "Ctrl", + /* pt_br */ "Ctrl", + /* ru */ "Ctrl", + /* zh_hans */ "Ctrl", + /* zh_hant */ "Ctrl", + ], + // Alt + [ + /* en */ "Alt", + /* de */ "Alt", + /* es */ "Alt", + /* fr */ "Alt", + /* it */ "Alt", + /* ja */ "Alt", + /* ko */ "Alt", + /* pt_br */ "Alt", + /* ru */ "Alt", + /* zh_hans */ "Alt", + /* zh_hant */ "Alt", + ], + // Shift + [ + /* en */ "Shift", + /* de */ "Umschalt", + /* es */ "Mayús", + /* fr */ "Maj", + /* it */ "Maiusc", + /* ja */ "Shift", + /* ko */ "Shift", + /* pt_br */ "Shift", + /* ru */ "Shift", + /* zh_hans */ "Shift", + /* zh_hant */ "Shift", + ], + + // File + [ + /* en */ "File", + /* de */ "Datei", + /* es */ "Archivo", + /* fr */ "Fichier", + /* it */ "File", + /* ja */ "ファイル", + /* ko */ "파일", + /* pt_br */ "Arquivo", + /* ru */ "Файл", + /* zh_hans */ "文件", + /* zh_hant */ "檔案", + ], + // FileSave + [ + /* en */ "Save", + /* de */ "Speichern", + /* es */ "Guardar", + /* fr */ "Enregistrer", + /* it */ "Salva", + /* ja */ "保存", + /* ko */ "저장", + /* pt_br */ "Salvar", + /* ru */ "Сохранить", + /* zh_hans */ "保存", + /* zh_hant */ "儲存", + ], + // FileSaveAs + // NOTE: Exact same translation as SaveAsDialogTitle, and both should be kept in sync. + [ + /* en */ "Save As…", + /* de */ "Speichern unter…", + /* es */ "Guardar como…", + /* fr */ "Enregistrer sous…", + /* it */ "Salva come…", + /* ja */ "名前を付けて保存…", + /* ko */ "다른 이름으로 저장…", + /* pt_br */ "Salvar como…", + /* ru */ "Сохранить как…", + /* zh_hans */ "另存为…", + /* zh_hant */ "另存新檔…", + ], + // FileExit + [ + /* en */ "Exit", + /* de */ "Beenden", + /* es */ "Salir", + /* fr */ "Quitter", + /* it */ "Esci", + /* ja */ "終了", + /* ko */ "종료", + /* pt_br */ "Sair", + /* ru */ "Выход", + /* zh_hans */ "退出", + /* zh_hant */ "退出", + ], + + // Edit + [ + /* en */ "Edit", + /* de */ "Bearbeiten", + /* es */ "Editar", + /* fr */ "Éditer", + /* it */ "Modifica", + /* ja */ "編集", + /* ko */ "편집", + /* pt_br */ "Editar", + /* ru */ "Правка", + /* zh_hans */ "编辑", + /* zh_hant */ "編輯", + ], + // EditUndo + [ + /* en */ "Undo", + /* de */ "Rückgängig", + /* es */ "Deshacer", + /* fr */ "Annuler", + /* it */ "Annulla", + /* ja */ "元に戻す", + /* ko */ "실행 취소", + /* pt_br */ "Desfazer", + /* ru */ "Отменить", + /* zh_hans */ "撤销", + /* zh_hant */ "復原", + ], + // EditRedo + [ + /* en */ "Redo", + /* de */ "Wiederholen", + /* es */ "Rehacer", + /* fr */ "Rétablir", + /* it */ "Ripeti", + /* ja */ "やり直し", + /* ko */ "다시 실행", + /* pt_br */ "Refazer", + /* ru */ "Повторить", + /* zh_hans */ "重做", + /* zh_hant */ "重做", + ], + // EditCut + [ + /* en */ "Cut", + /* de */ "Ausschneiden", + /* es */ "Cortar", + /* fr */ "Couper", + /* it */ "Taglia", + /* ja */ "切り取り", + /* ko */ "잘라내기", + /* pt_br */ "Cortar", + /* ru */ "Вырезать", + /* zh_hans */ "剪切", + /* zh_hant */ "剪下", + ], + // EditCopy + [ + /* en */ "Copy", + /* de */ "Kopieren", + /* es */ "Copiar", + /* fr */ "Copier", + /* it */ "Copia", + /* ja */ "コピー", + /* ko */ "복사", + /* pt_br */ "Copiar", + /* ru */ "Копировать", + /* zh_hans */ "复制", + /* zh_hant */ "複製", + ], + // EditPaste + [ + /* en */ "Paste", + /* de */ "Einfügen", + /* es */ "Pegar", + /* fr */ "Coller", + /* it */ "Incolla", + /* ja */ "貼り付け", + /* ko */ "붙여넣기", + /* pt_br */ "Colar", + /* ru */ "Вставить", + /* zh_hans */ "粘贴", + /* zh_hant */ "貼上", + ], + // EditFind + [ + /* en */ "Find", + /* de */ "Suchen", + /* es */ "Buscar", + /* fr */ "Rechercher", + /* it */ "Trova", + /* ja */ "検索", + /* ko */ "찾기", + /* pt_br */ "Encontrar", + /* ru */ "Найти", + /* zh_hans */ "查找", + /* zh_hant */ "尋找", + ], + // EditReplace + [ + /* en */ "Replace", + /* de */ "Ersetzen", + /* es */ "Reemplazar", + /* fr */ "Remplacer", + /* it */ "Sostituisci", + /* ja */ "置換", + /* ko */ "바꾸기", + /* pt_br */ "Substituir", + /* ru */ "Заменить", + /* zh_hans */ "替换", + /* zh_hant */ "取代", + ], + + // View + [ + /* en */ "View", + /* de */ "Ansicht", + /* es */ "Ver", + /* fr */ "Affichage", + /* it */ "Visualizza", + /* ja */ "表示", + /* ko */ "보기", + /* pt_br */ "Exibir", + /* ru */ "Вид", + /* zh_hans */ "视图", + /* zh_hant */ "檢視", + ], + // ViewWordWrap + [ + /* en */ "Word Wrap", + /* de */ "Zeilenumbruch", + /* es */ "Ajuste de línea", + /* fr */ "Retour à la ligne", + /* it */ "A capo automatico", + /* ja */ "折り返し", + /* ko */ "자동 줄 바꿈", + /* pt_br */ "Quebra de linha", + /* ru */ "Перенос слов", + /* zh_hans */ "自动换行", + /* zh_hant */ "自動換行", + ], + + // Help + [ + /* en */ "Help", + /* de */ "Hilfe", + /* es */ "Ayuda", + /* fr */ "Aide", + /* it */ "Aiuto", + /* ja */ "ヘルプ", + /* ko */ "도움말", + /* pt_br */ "Ajuda", + /* ru */ "Помощь", + /* zh_hans */ "帮助", + /* zh_hant */ "幫助", + ], + // HelpAbout + [ + /* en */ "About", + /* de */ "Über", + /* es */ "Acerca de", + /* fr */ "À propos", + /* it */ "Informazioni", + /* ja */ "情報", + /* ko */ "정보", + /* pt_br */ "Sobre", + /* ru */ "О программе", + /* zh_hans */ "关于", + /* zh_hant */ "關於", + ], + + // UnsavedChangesDialogTitle + [ + /* en */ "Unsaved Changes", + /* de */ "Ungespeicherte Änderungen", + /* es */ "Cambios sin guardar", + /* fr */ "Modifications non enregistrées", + /* it */ "Modifiche non salvate", + /* ja */ "未保存の変更", + /* ko */ "저장되지 않은 변경 사항", + /* pt_br */ "Alterações não salvas", + /* ru */ "Несохраненные изменения", + /* zh_hans */ "未保存的更改", + /* zh_hant */ "未儲存的變更", + ], + // UnsavedChangesDialogDescription + [ + /* en */ "Do you want to save the changes you made?", + /* de */ "Möchten Sie die vorgenommenen Änderungen speichern?", + /* es */ "¿Desea guardar los cambios realizados?", + /* fr */ "Voulez-vous enregistrer les modifications apportées?", + /* it */ "Vuoi salvare le modifiche apportate?", + /* ja */ "変更内容を保存しますか?", + /* ko */ "변경한 내용을 저장하시겠습니까?", + /* pt_br */ "Deseja salvar as alterações feitas?", + /* ru */ "Вы хотите сохранить внесённые изменения?", + /* zh_hans */ "您要保存所做的更改吗?", + /* zh_hant */ "您要保存所做的變更嗎?", + ], + // UnsavedChangesDialogYes + [ + /* en */ "Save", + /* de */ "Speichern", + /* es */ "Guardar", + /* fr */ "Enregistrer", + /* it */ "Salva", + /* ja */ "保存", + /* ko */ "저장", + /* pt_br */ "Salvar", + /* ru */ "Сохранить", + /* zh_hans */ "保存", + /* zh_hant */ "儲存", + ], + // UnsavedChangesDialogNo + [ + /* en */ "Don't Save", + /* de */ "Nicht speichern", + /* es */ "No guardar", + /* fr */ "Ne pas enregistrer", + /* it */ "Non salvare", + /* ja */ "保存しない", + /* ko */ "저장 안 함", + /* pt_br */ "Não salvar", + /* ru */ "Не сохранять", + /* zh_hans */ "不保存", + /* zh_hant */ "不儲存", + ], + // UnsavedChangesDialogCancel + [ + /* en */ "Cancel", + /* de */ "Abbrechen", + /* es */ "Cancelar", + /* fr */ "Annuler", + /* it */ "Annulla", + /* ja */ "キャンセル", + /* ko */ "취소", + /* pt_br */ "Cancelar", + /* ru */ "Отмена", + /* zh_hans */ "取消", + /* zh_hant */ "取消", + ], + + // AboutDialogTitle + [ + /* en */ "About", + /* de */ "Über", + /* es */ "Acerca de", + /* fr */ "À propos", + /* it */ "Informazioni", + /* ja */ "情報", + /* ko */ "정보", + /* pt_br */ "Sobre", + /* ru */ "О программе", + /* zh_hans */ "关于", + /* zh_hant */ "關於", + ], + // AboutDialogDescription + [ + /* en */ "Grug's favorite editor", + /* de */ "Grugs Lieblingseditor", + /* es */ "El editor favorito de Grug", + /* fr */ "L'éditeur préféré de Grug", + /* it */ "L'editor preferito di Grug", + /* ja */ "Grugのお気に入りエディタ", + /* ko */ "Grug이 가장 좋아하는 편집기", + /* pt_br */ "O editor favorito do Grug", + /* ru */ "Любимый редактор Груга", + /* zh_hans */ "Grug最喜欢的编辑器", + /* zh_hant */ "Grug最喜歡的編輯器", + ], + // AboutDialogVersion + [ + /* en */ "Version: ", + /* de */ "Version: ", + /* es */ "Versión: ", + /* fr */ "Version : ", + /* it */ "Versione: ", + /* ja */ "バージョン: ", + /* ko */ "버전: ", + /* pt_br */ "Versão: ", + /* ru */ "Версия: ", + /* zh_hans */ "版本:", + /* zh_hant */ "版本:", + ], + + // SearchLabel + [ + /* en */ "Find:", + /* de */ "Suchen:", + /* es */ "Buscar:", + /* fr */ "Rechercher:", + /* it */ "Trova:", + /* ja */ "検索:", + /* ko */ "찾기:", + /* pt_br */ "Encontrar:", + /* ru */ "Найти:", + /* zh_hans */ "查找:", + /* zh_hant */ "尋找:", + ], + // SearchClose + [ + /* en */ "Close", + /* de */ "Schließen", + /* es */ "Cerrar", + /* fr */ "Fermer", + /* it */ "Chiudi", + /* ja */ "閉じる", + /* ko */ "닫기", + /* pt_br */ "Fechar", + /* ru */ "Закрыть", + /* zh_hans */ "关闭", + /* zh_hant */ "關閉", + ], + // SearchMatchCase + [ + /* en */ "Match Case", + /* de */ "Groß/Klein", + /* es */ "May/Min", + /* fr */ "Casse", + /* it */ "Maius/minus", + /* ja */ "大/小文字", + /* ko */ "대소문자", + /* pt_br */ "Maius/minus", + /* ru */ "Регистр", + /* zh_hans */ "区分大小写", + /* zh_hant */ "區分大小寫", + ], + // SearchWholeWord + [ + /* en */ "Whole Word", + /* de */ "Ganzes Wort", + /* es */ "Palabra", + /* fr */ "Mot entier", + /* it */ "Parola", + /* ja */ "単語単位", + /* ko */ "전체 단어", + /* pt_br */ "Palavra", + /* ru */ "Слово", + /* zh_hans */ "全字匹配", + /* zh_hant */ "全字匹配", + ], + // SearchUseRegex + [ + /* en */ "Use Regex", + /* de */ "RegEx", + /* es */ "RegEx", + /* fr */ "RegEx", + /* it */ "RegEx", + /* ja */ "正規表現", + /* ko */ "정규식", + /* pt_br */ "RegEx", + /* ru */ "RegEx", + /* zh_hans */ "正则", + /* zh_hant */ "正則", + ], + + // EncodingReopen + [ + /* en */ "Reopen with encoding", + /* de */ "Mit Kodierung erneut öffnen", + /* es */ "Reabrir con codificación", + /* fr */ "Rouvrir avec un encodage différent", + /* it */ "Riapri con codifica", + /* ja */ "エンコーディングで再度開く", + /* ko */ "인코딩으로 다시 열기", + /* pt_br */ "Reabrir com codificação", + /* ru */ "Открыть снова с кодировкой", + /* zh_hans */ "使用编码重新打开", + /* zh_hant */ "使用編碼重新打開", + ], + // EncodingConvert + [ + /* en */ "Convert to encoding", + /* de */ "In Kodierung konvertieren", + /* es */ "Convertir a otra codificación", + /* fr */ "Convertir en encodage", + /* it */ "Converti in codifica", + /* ja */ "エンコーディングに変換", + /* ko */ "인코딩으로 변환", + /* pt_br */ "Converter para codificação", + /* ru */ "Преобразовать в кодировку", + /* zh_hans */ "转换为编码", + /* zh_hant */ "轉換為編碼", + ], + + // IndentationTabs + [ + /* en */ "Tabs", + /* de */ "Tabs", + /* es */ "Tabulaciones", + /* fr */ "Tabulations", + /* it */ "Tabulazioni", + /* ja */ "タブ", + /* ko */ "탭", + /* pt_br */ "Tabulações", + /* ru */ "Табы", + /* zh_hans */ "制表符", + /* zh_hant */ "製表符", + ], + // IndentationSpaces + [ + /* en */ "Spaces", + /* de */ "Leerzeichen", + /* es */ "Espacios", + /* fr */ "Espaces", + /* it */ "Spazi", + /* ja */ "スペース", + /* ko */ "공백", + /* pt_br */ "Espaços", + /* ru */ "Пробелы", + /* zh_hans */ "空格", + /* zh_hant */ "空格", + ], + + // SaveAsDialogTitle + // NOTE: Exact same translation as FileSaveAs, and both should be kept in sync. + [ + /* en */ "Save As…", + /* de */ "Speichern unter…", + /* es */ "Guardar como…", + /* fr */ "Enregistrer sous…", + /* it */ "Salva come…", + /* ja */ "名前を付けて保存…", + /* ko */ "다른 이름으로 저장…", + /* pt_br */ "Salvar como…", + /* ru */ "Сохранить как…", + /* zh_hans */ "另存为…", + /* zh_hant */ "另存新檔…", + ], + // SaveAsDialogFilenameLabel + [ + /* en */ "Filename:", + /* de */ "Dateiname:", + /* es */ "Nombre de archivo:", + /* fr */ "Nom de fichier :", + /* it */ "Nome del file:", + /* ja */ "ファイル名:", + /* ko */ "파일 이름:", + /* pt_br */ "Nome do arquivo:", + /* ru */ "Имя файла:", + /* zh_hans */ "文件名:", + /* zh_hant */ "檔案名稱:", + ], +]; + +static mut S_LANG: LangId = LangId::en; + +pub fn init() { + let langs = sys::preferred_languages(); + let mut lang = LangId::en; + + for l in langs { + lang = match l.as_str() { + "en" => LangId::en, + "de" => LangId::de, + "es" => LangId::es, + "fr" => LangId::fr, + "it" => LangId::it, + "ja" => LangId::ja, + "ko" => LangId::ko, + "pt-br" => LangId::pt_br, + "ru" => LangId::ru, + "zh-hant" => LangId::zh_hant, + "zh" => LangId::zh_hans, + _ => continue, + }; + break; + } + + unsafe { + S_LANG = lang; + } +} + +pub fn loc(id: LocId) -> &'static str { + S_LANG_LUT[id as usize][unsafe { S_LANG as usize }] +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..3bf88f5 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,1067 @@ +// TODO: +// * Backspace at an indented line start should unindent by 1. +// * ...same for Shift+Tab. +// * Backspace undo grouping. +// * Goto line +// * When focus changes, all scrollareas in the path should scroll the item into view +// * Windows should have a yedit titlebar +// * yori colors +// * grid layout +// * Checking if the focus-down path is identical to focus-up, so that drags stay in their path and clicks only click. +// * Output diffing / compression +// * BUG: When word-wrap is enabled, insert a tab into the last word of the first of a wrapped line. +// The entire word will wrap with the tab in the middle, as if the tab is not whitespace. +// * COMMIT_MESSAGE, --rulers, .editorconfig +// -------------------------------------------------- +// * This would allow us to skip computing the stats.visual_lines, +// because we could simply scroll by the number of logical lines. +// * Word wrapping could not be part of ucd, but rather get computed +// lazily by the render function. This trivializes everything else. +// * Replace +// * Multi-Cursor +// * Scrolling by dragging the track/thumb +// * For the focus path we can use the tree depth to O(1) check if the path contains the focus. + +#![allow( + dead_code, + clippy::needless_if, + clippy::uninit_assumed_init, + clippy::missing_transmute_annotations +)] + +use buffer::RcTextBuffer; +use helpers::{COORD_TYPE_SAFE_MAX, DisplayablePathBuf, Point}; +use input::{kbmod, vk}; + +use crate::framebuffer::IndexedColor; +use crate::helpers::{Rect, Size}; +use crate::loc::{LocId, loc}; +use crate::tui::*; +use crate::vt::Token; +use std::fs::File; +use std::mem; +use std::path::{Path, PathBuf}; +use std::{cmp, process}; + +#[cfg(feature = "debug-latency")] +use std::fmt::Write; + +mod apperr; +mod buffer; +mod framebuffer; +mod fuzzy; +mod helpers; +mod icu; +mod input; +mod loc; +mod memchr; +mod sys; +mod trust_me_bro; +mod tui; +mod ucd; +mod ucd_gen; +mod utf8; +mod vt; + +struct RestoreModes; + +impl Drop for RestoreModes { + fn drop(&mut self) { + // Same as in the beginning but in the reverse order. + // It also includes DECSCUSR 0 to reset the cursor style and DECTCEM to show the cursor. + sys::write_stdout("\x1b[?1002;1006;2004l\x1b[?1049l\x1b[0 q\x1b[?25h"); + } +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum StateSearch { + Hidden, + Focus, + Shown, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum StateSave { + None, + Save, + SaveAs, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum StateEncodingChange { + None, + Convert, + Reopen, +} + +struct State { + path: Option, + buffer: RcTextBuffer, + + // A ring buffer of the last 10 errors. + error_log: [String; 10], + error_log_index: usize, + error_log_count: usize, + + wants_save: StateSave, + save_dir: DisplayablePathBuf, + save_entries: Option>, + save_filename: String, // This could be PathBuf, if `tui` would expose its TextBuffer for editline. + + wants_search: StateSearch, + search_needle: String, + search_options: buffer::SearchOptions, + + wants_encoding_change: StateEncodingChange, + wants_about: bool, + wants_exit: bool, + exit: bool, +} + +impl State { + fn new() -> apperr::Result { + let path = std::env::args_os() + .nth(1) + .and_then(|p| if p == "-" { None } else { Some(p) }) + .map(PathBuf::from); + let save_filename = path + .as_ref() + .and_then(|p| p.file_name()) + .unwrap_or_default() + .to_string_lossy() + .into_owned(); + + let mut buffer = RcTextBuffer::new(false)?; + buffer.set_margin_enabled(true); + buffer.set_ruler(if save_filename == "COMMIT_EDITMSG" { + Some(72) + } else { + None + }); + + Ok(Self { + path, + buffer, + + error_log: [const { String::new() }; 10], + error_log_index: 0, + error_log_count: 0, + + wants_save: StateSave::None, + save_dir: DisplayablePathBuf::new(std::env::current_dir()?), + save_entries: None, + save_filename, + + wants_search: StateSearch::Hidden, + search_needle: String::new(), + search_options: buffer::SearchOptions::default(), + + wants_encoding_change: StateEncodingChange::None, + wants_about: false, + wants_exit: false, + exit: false, + }) + } +} + +impl State { + fn update_path(&mut self, path: Option) { + self.path = path; + } +} + +fn main() -> process::ExitCode { + if cfg!(debug_assertions) { + let hook = std::panic::take_hook(); + std::panic::set_hook(Box::new(move |info| { + drop(RestoreModes); + sys::deinit(); + hook(info); + })); + } + + let code = match run() { + Ok(()) => process::ExitCode::SUCCESS, + Err(err) => { + let mut msg = err.message(); + msg.push_str("\r\n"); + sys::write_stdout(&msg); + process::ExitCode::FAILURE + } + }; + sys::deinit(); + code +} + +fn run() -> apperr::Result<()> { + sys::init()?; + + let mut state = State::new()?; + let mut vt_parser = vt::Parser::new(); + let mut input_parser = input::Parser::new(); + let mut tui = Tui::new(); + + if let Some(mut file) = sys::open_stdin_if_redirected() { + state.buffer.read_file(&mut file, None)?; + state.buffer.mark_as_dirty(); + } else if let Some(path) = &state.path { + match file_open(path) { + Ok(mut file) => { + state.buffer.read_file(&mut file, None)?; + } + Err(apperr::APP_FILE_NOT_FOUND) => {} + Err(err) => return Err(err), + } + } + + query_color_palette(&mut tui, &mut vt_parser); + + // 1049: Alternative Screen Buffer + // I put the ASB switch in the beginning, just in case the terminal performs + // some additional state tracking beyond the modes we enable/disable. + // 1002: Cell Motion Mouse Tracking + // 1006: SGR Mouse Mode + // 2004: Bracketed Paste Mode + let _restore_modes = RestoreModes; + sys::write_stdout("\x1b[?1049h\x1b[?1002;1006;2004h"); + sys::inject_window_size_into_stdin(); + + loop { + let Some(input) = sys::read_stdin(vt_parser.read_timeout()) else { + break; + }; + + #[cfg(feature = "debug-latency")] + let time_beg = std::time::Instant::now(); + #[cfg(feature = "debug-latency")] + let mut passes = 0usize; + + // TODO: lifetime + let vt_iter = vt_parser.parse(trust_me_bro::this_lifetime_change_is_totally_safe(&input)); + let mut input_iter = input_parser.parse(vt_iter); + + // Process all input. + while let Some(ui_input) = input_iter.next() { + let mut ctx = tui.create_context(Some(ui_input)); + draw(&mut ctx, &mut state); + + #[cfg(feature = "debug-latency")] + { + passes += 1; + } + } + + // Continue rendering until the layout has settled. + // This can take >1 frame, if the input focus is tossed between different controls. + while tui.needs_settling() { + let mut ctx = tui.create_context(None); + draw(&mut ctx, &mut state); + + #[cfg(feature = "debug-layout")] + state.buffer.debug_replace_everything(&tui.debug_layout()); + + #[cfg(feature = "debug-latency")] + { + passes += 1; + } + } + + if state.exit { + break; + } + + #[cfg(feature = "debug-latency")] + { + let mut output = tui.render(); + + // Print the number of passes and latency in the top right corner. + let time_end = std::time::Instant::now(); + let status = time_end - time_beg; + let status = format!("{}x {:.3}μs", passes, status.as_nanos() as f64 / 1000.0); + + // "μs" is 3 bytes and 2 columns. + let cols = status.len() as i32 - 3 + 2; + let x = tui.size().width - cols; + + // To avoid moving the cursor, push and pop it onto the VT cursor stack. + _ = write!(output, "\x1b7\x1b[1;{}H{}\x1b8", x + 1, status); + + sys::write_stdout(&output); + } + #[cfg(not(feature = "debug-latency"))] + { + let output = tui.render(); + sys::write_stdout(&output); + } + } + + Ok(()) +} + +fn draw(ctx: &mut Context, state: &mut State) { + draw_menubar(ctx, state); + if state.wants_search != StateSearch::Hidden { + draw_search(ctx, state); + } + draw_editor(ctx, state); + draw_statusbar(ctx, state); + + if state.wants_save != StateSave::None + && (state.wants_save == StateSave::SaveAs || state.path.is_none()) + { + draw_dialog_saveas(ctx, state); + } + + if state.wants_save == StateSave::Save { + draw_handle_save(state, None); + } + + if state.wants_encoding_change != StateEncodingChange::None { + draw_dialog_encoding_change(ctx, state); + } + + // If the user presses "Save" on the exit dialog we'll possible show a SaveAs dialog. + // The exit dialog should then get hidden. + if state.wants_exit && state.wants_save == StateSave::None { + draw_handle_wants_exit(ctx, state); + } + + if state.wants_about { + draw_dialog_about(ctx, state); + } + + if state.error_log_count != 0 { + draw_error_log(ctx, state); + } + + // Shortcuts that are not handled as part of the textarea. + if ctx.consume_shortcut(kbmod::CTRL | vk::S) { + state.wants_save = StateSave::Save; + } + if ctx.consume_shortcut(kbmod::CTRL_SHIFT | vk::S) { + state.wants_save = StateSave::SaveAs; + } + if ctx.consume_shortcut(kbmod::CTRL | vk::Q) { + state.wants_exit = true; + } + if ctx.consume_shortcut(kbmod::CTRL | vk::F) { + state.wants_search = StateSearch::Focus; + } +} + +fn draw_menubar(ctx: &mut Context, state: &mut State) { + ctx.menubar_begin(); + ctx.attr_background_rgba(ctx.indexed(IndexedColor::BrightBlue)); + { + if ctx.menubar_menu_begin(loc(LocId::File), 'F') { + draw_menu_file(ctx, state); + } + if ctx.menubar_menu_begin(loc(LocId::Edit), 'E') { + draw_menu_edit(ctx, state); + } + if ctx.menubar_menu_begin(loc(LocId::View), 'V') { + draw_menu_view(ctx, state); + } + if ctx.menubar_menu_begin(loc(LocId::Help), 'H') { + draw_menu_help(ctx, state); + } + } + ctx.menubar_end(); +} + +fn draw_menu_file(ctx: &mut Context, state: &mut State) { + if ctx.menubar_menu_item(loc(LocId::FileSave), 'S', kbmod::CTRL | vk::S) { + state.wants_save = StateSave::Save; + } + if ctx.menubar_menu_item(loc(LocId::FileSaveAs), 'A', vk::NULL) { + state.wants_save = StateSave::SaveAs; + } + if ctx.menubar_menu_item(loc(LocId::FileExit), 'X', kbmod::CTRL | vk::Q) { + state.wants_exit = true; + } + ctx.menubar_menu_end(); +} + +fn draw_menu_edit(ctx: &mut Context, state: &mut State) { + if ctx.menubar_menu_item(loc(LocId::EditUndo), 'U', kbmod::CTRL | vk::Z) { + state.buffer.undo(); + } + if ctx.menubar_menu_item(loc(LocId::EditRedo), 'R', kbmod::CTRL | vk::Y) { + state.buffer.redo(); + } + if ctx.menubar_menu_item(loc(LocId::EditCut), 'T', kbmod::CTRL | vk::X) { + ctx.set_clipboard(state.buffer.extract_selection(true)); + } + if ctx.menubar_menu_item(loc(LocId::EditCopy), 'C', kbmod::CTRL | vk::C) { + ctx.set_clipboard(state.buffer.extract_selection(false)); + } + if ctx.menubar_menu_item(loc(LocId::EditPaste), 'P', kbmod::CTRL | vk::V) { + state.buffer.write(ctx.get_clipboard()); + } + if ctx.menubar_menu_item(loc(LocId::EditFind), 'F', kbmod::CTRL | vk::F) { + state.wants_search = StateSearch::Focus; + } + ctx.menubar_menu_end(); +} + +fn draw_menu_view(ctx: &mut Context, state: &mut State) { + if ctx.menubar_menu_item(loc(LocId::ViewWordWrap), 'W', kbmod::ALT | vk::Z) { + state.buffer.toggle_word_wrap(); + } + ctx.menubar_menu_end(); +} + +fn draw_menu_help(ctx: &mut Context, state: &mut State) { + if ctx.menubar_menu_item(loc(LocId::HelpAbout), 'A', vk::NULL) { + state.wants_about = true; + } + ctx.menubar_menu_end(); +} + +fn draw_search(ctx: &mut Context, state: &mut State) { + ctx.block_begin("search"); + ctx.attr_background_rgba(ctx.indexed(IndexedColor::White)); + { + if ctx.contains_focus() && ctx.consume_shortcut(vk::ESCAPE) { + state.wants_search = StateSearch::Hidden; + } + + ctx.table_begin("needle"); + ctx.table_set_cell_gap(Size { + width: 1, + height: 0, + }); + { + ctx.table_next_row(); + + ctx.label("label", Overflow::Clip, loc(LocId::SearchLabel)); + + ctx.editline("input", &mut state.search_needle); + ctx.attr_intrinsic_size(Size { + width: COORD_TYPE_SAFE_MAX, + height: 1, + }); + if state.wants_search == StateSearch::Focus { + state.wants_search = StateSearch::Shown; + ctx.steal_focus(); + } + if ctx.is_focused() && ctx.consume_shortcut(vk::RETURN) { + if let Err(err) = state + .buffer + .find_and_select(&state.search_needle, state.search_options) + { + error_log_add(state, err); + } + } + } + ctx.table_end(); + + ctx.table_begin("options"); + ctx.table_set_cell_gap(Size { + width: 2, + height: 0, + }); + { + ctx.table_next_row(); + + if ctx.button("close", Overflow::Clip, "Close") { + state.wants_search = StateSearch::Hidden; + } + + ctx.checkbox( + "match-case", + Overflow::Clip, + loc(LocId::SearchMatchCase), + &mut state.search_options.match_case, + ); + ctx.checkbox( + "whole-word", + Overflow::Clip, + loc(LocId::SearchWholeWord), + &mut state.search_options.whole_word, + ); + ctx.checkbox( + "use-regex", + Overflow::Clip, + loc(LocId::SearchUseRegex), + &mut state.search_options.use_regex, + ); + } + ctx.table_end(); + } + ctx.block_end(); +} + +fn draw_editor(ctx: &mut Context, state: &mut State) { + let size = ctx.size(); + // TODO: The layout code should be able to just figure out the height on its own. + let mut height_reduction = 2; + if state.wants_search != StateSearch::Hidden { + height_reduction += 2; + } + ctx.textarea("textarea", state.buffer.clone()); + ctx.inherit_focus(); + ctx.attr_intrinsic_size(Size { + width: 0, + height: size.height - height_reduction, + }); +} + +fn draw_statusbar(ctx: &mut Context, state: &mut State) { + ctx.table_begin("statusbar"); + ctx.attr_background_rgba(ctx.indexed(IndexedColor::BrightBlue)); + ctx.table_set_cell_gap(Size { + width: 2, + height: 0, + }); + ctx.attr_padding(Rect::two(0, 1)); + { + ctx.table_next_row(); + + if ctx.button( + "newline", + Overflow::Clip, + if state.buffer.is_crlf() { "CRLF" } else { "LF" }, + ) { + let is_crlf = state.buffer.is_crlf(); + state.buffer.normalize_newlines(!is_crlf); + ctx.toss_focus_up(); + } + + ctx.button("encoding", Overflow::Clip, state.buffer.encoding()); + if ctx.contains_focus() { + if state.path.is_some() { + ctx.block_begin("encoding-picker"); + ctx.attr_float(FloatSpec { + anchor: Anchor::Last, + gravity_x: 0.0, + gravity_y: 1.0, + offset_x: 0, + offset_y: 0, + }); + ctx.attr_background_rgba(ctx.indexed(IndexedColor::White)); + ctx.attr_border(); + ctx.attr_padding(Rect::two(0, 1)); + { + if ctx.button("reopen", Overflow::Clip, loc(LocId::EncodingReopen)) { + state.wants_encoding_change = StateEncodingChange::Reopen; + } + ctx.focus_on_first_present(); + + if ctx.button("convert", Overflow::Clip, loc(LocId::EncodingConvert)) { + state.wants_encoding_change = StateEncodingChange::Convert; + } + } + ctx.block_end(); + } else { + // Can't reopen a file that doesn't exist. + state.wants_encoding_change = StateEncodingChange::Convert; + } + } + + ctx.button( + "indentation", + Overflow::Clip, + &format!( + "{}:{}", + loc(if state.buffer.indent_with_tabs() { + LocId::IndentationTabs + } else { + LocId::IndentationSpaces + }), + state.buffer.tab_size(), + ), + ); + if ctx.contains_focus() { + ctx.table_begin("indentation-picker"); + ctx.attr_float(FloatSpec { + anchor: Anchor::Last, + gravity_x: 0.0, + gravity_y: 1.0, + offset_x: 0, + offset_y: 0, + }); + ctx.attr_background_rgba(ctx.indexed(IndexedColor::White)); + ctx.attr_border(); + ctx.attr_padding(Rect::two(0, 1)); + ctx.table_set_cell_gap(Size { + width: 1, + height: 0, + }); + { + ctx.table_next_row(); + + ctx.block_begin("indentation-type"); + { + if ctx.button("tabs", Overflow::Clip, loc(LocId::IndentationTabs)) { + state.buffer.set_indent_with_tabs(true); + } + if state.buffer.indent_with_tabs() { + ctx.attr_background_rgba(ctx.indexed(IndexedColor::Blue)); + } + ctx.attr_padding(Rect::two(0, 2)); + ctx.focus_on_first_present(); + + if ctx.button("spaces", Overflow::Clip, loc(LocId::IndentationSpaces)) { + state.buffer.set_indent_with_tabs(false); + } + if !state.buffer.indent_with_tabs() { + ctx.attr_background_rgba(ctx.indexed(IndexedColor::Blue)); + } + ctx.attr_padding(Rect::two(0, 2)); + } + ctx.block_end(); + + ctx.block_begin("indentation-width"); + { + for width in 1u8..=8 { + let ch = [b'0' + width]; + let label = unsafe { std::str::from_utf8_unchecked(&ch) }; + + ctx.next_block_id_mixin(width as u64); + if ctx.button("width", Overflow::Clip, label) { + state.buffer.set_tab_size(width as i32); + } + if state.buffer.tab_size() == width as i32 { + ctx.attr_background_rgba(ctx.indexed(IndexedColor::Blue)); + } + ctx.attr_padding(Rect::two(0, 2)); + } + } + ctx.block_end(); + } + ctx.table_end(); + } + + ctx.label( + "location", + Overflow::Clip, + &format!( + "{}:{}", + state.buffer.get_cursor_logical_pos().y + 1, + state.buffer.get_cursor_logical_pos().x + 1 + ), + ); + + #[cfg(any(feature = "debug-layout", feature = "debug-latency"))] + ctx.label( + "stats", + Overflow::Clip, + &format!( + "{}/{}", + state.buffer.get_logical_line_count(), + state.buffer.get_visual_line_count(), + ), + ); + + if state.buffer.is_overtype() && ctx.button("overtype", Overflow::Clip, "OVR") { + state.buffer.set_overtype(false); + } + } + ctx.table_end(); +} + +fn draw_dialog_saveas(ctx: &mut Context, state: &mut State) { + if state.wants_save == StateSave::Save && state.path.is_none() { + state.wants_save = StateSave::SaveAs; + } + + let width = (ctx.size().width - 20).max(10); + let height = (ctx.size().height - 10).max(10); + + ctx.modal_begin("saveas", loc(LocId::SaveAsDialogTitle)); + ctx.attr_intrinsic_size(Size { width, height }); + { + ctx.label("path", Overflow::TruncateMiddle, state.save_dir.as_str()); + + { + if state.save_entries.is_none() { + draw_refresh_save_entries(state); + } + + let files = state.save_entries.as_ref().unwrap(); + let mut change_dir = None; + + ctx.scrollarea_begin( + "directory", + Size { + width: 0, + // -1 for the label (top) + // -1 for the label (bottom) + // -1 for the editline (bottom) + height: height - 3, + }, + ); + ctx.next_block_id_mixin(state.save_dir.as_str().len() as u64); + ctx.list_begin("files"); + for (i, entry) in files.iter().enumerate() { + if ctx.list_item(Overflow::TruncateMiddle, entry.as_str()) { + let str = entry.as_str(); + if str.ends_with('/') || str == ".." { + change_dir = Some(entry); + } else if state.save_filename != str { + state.save_filename = str.to_string(); + } else { + // Treat clicking twice on an item as confirmation to save it. + // TODO: This feels a bit weird if the user clicks on a `save_filename`-named item, + // because it skips the double-click confirmation. + state.wants_save = StateSave::Save; + } + } + if i == 0 { + ctx.focus_on_first_present(); + } + } + ctx.list_end(); + ctx.scrollarea_end(); + + if let Some(entry) = change_dir { + let mut path = mem::take(&mut state.save_dir).take(); + + if entry.as_str() == ".." { + path.pop(); + } else { + // `entry` is a directory name with trailing "/", + // but we don't want the "/" in the path (it would look ugly). + let entry_str = entry.as_str(); + path.push(&entry_str[..entry_str.len() - 1]); + } + + state.save_dir = DisplayablePathBuf::new(path); + state.save_entries = None; + ctx.scrollarea_scroll_to(Point { x: 0, y: 0 }); + } + } + + ctx.label( + "filename-label", + Overflow::Clip, + &loc(LocId::SaveAsDialogFilenameLabel), + ); + + ctx.editline("filename", &mut state.save_filename); + ctx.focus_on_first_present(); + ctx.inherit_focus(); + if ctx.is_focused() && ctx.consume_shortcut(vk::RETURN) { + state.wants_save = StateSave::Save; + } + + if state.wants_save == StateSave::Save && !state.save_filename.is_empty() { + let path = Some(state.save_dir.as_path().join(&state.save_filename)); + // Only update the path if the save was successful. + if draw_handle_save(state, path.as_ref()) { + state.path = path; + } + } + } + if ctx.modal_end() { + state.wants_save = StateSave::None; + } +} + +fn draw_refresh_save_entries(state: &mut State) { + let dir = state.save_dir.as_path(); + let mut files = Vec::new(); + + if dir.parent().is_some() { + files.push(DisplayablePathBuf::from("..")); + } + + if let Ok(iter) = std::fs::read_dir(dir) { + for entry in iter.flatten() { + if let Ok(metadata) = entry.metadata() { + let mut name = entry.file_name(); + if metadata.is_dir() { + name.push("/"); + } + files.push(DisplayablePathBuf::from(name)); + } + } + } + + // Sort directories first, then by name, case-insensitive. + files[1..].sort_by(|a, b| { + let a = a.as_bytes(); + let b = b.as_bytes(); + + let a_is_dir = a.last() == Some(&b'/'); + let b_is_dir = b.last() == Some(&b'/'); + + match b_is_dir.cmp(&a_is_dir) { + cmp::Ordering::Equal => icu::compare_strings(a, b), + other => other, + } + }); + + state.save_entries = Some(files); +} + +fn draw_handle_save(state: &mut State, path: Option<&PathBuf>) -> bool { + if let Some(path) = path.or(state.path.as_ref()) { + if let Err(err) = state.buffer.write_file(path) { + error_log_add(state, err); + return false; + } + } + state.wants_save = StateSave::None; + true +} + +fn draw_dialog_encoding_change(ctx: &mut Context, state: &mut State) { + let reopen = state.wants_encoding_change == StateEncodingChange::Reopen; + let width = (ctx.size().width - 20).max(10); + let height = (ctx.size().height - 10).max(10); + + ctx.modal_begin( + "encode", + if reopen { + loc(LocId::EncodingReopen) + } else { + loc(LocId::EncodingConvert) + }, + ); + { + ctx.scrollarea_begin("scrollarea", Size { width, height }); + { + let encodings = icu::get_available_encodings(); + + ctx.list_begin("encodings"); + for encoding in encodings { + if ctx.list_item(Overflow::Clip, encoding.as_str()) { + state.wants_encoding_change = StateEncodingChange::None; + if reopen && state.path.is_some() { + if state.buffer.is_dirty() { + if let Some(path) = &state.path { + if let Err(err) = state.buffer.write_file(path) { + error_log_add(state, err); + } + } + } + + if let Some(path) = &mut state.path { + if let Err(err) = file_open(path).and_then(|mut file| { + state.buffer.read_file(&mut file, Some(encoding.as_str())) + }) { + error_log_add(state, err); + } + } + } else { + state.buffer.set_encoding(encoding.as_str()); + } + } + } + ctx.list_end(); + } + ctx.scrollarea_end(); + } + if ctx.modal_end() { + state.wants_encoding_change = StateEncodingChange::None; + } +} + +fn draw_handle_wants_exit(ctx: &mut Context, state: &mut State) { + if !state.buffer.is_dirty() { + state.exit = true; + return; + } + + ctx.modal_begin("unsaved-changes", loc(LocId::UnsavedChangesDialogTitle)); + ctx.attr_background_rgba(ctx.indexed(IndexedColor::Red)); + { + ctx.label( + "description", + Overflow::Clip, + loc(LocId::UnsavedChangesDialogDescription), + ); + ctx.attr_padding(Rect::three(1, 2, 1)); + + ctx.table_begin("choices"); + ctx.attr_padding(Rect::three(0, 2, 1)); + ctx.attr_alignment(Alignment::Center); + ctx.table_set_cell_gap(Size { + width: 2, + height: 0, + }); + { + ctx.table_next_row(); + + if ctx.button("yes", Overflow::Clip, loc(LocId::UnsavedChangesDialogYes)) { + state.wants_save = StateSave::Save; + } + if ctx.button("no", Overflow::Clip, loc(LocId::UnsavedChangesDialogNo)) { + state.exit = true; + } + if ctx.button( + "cancel", + Overflow::Clip, + loc(LocId::UnsavedChangesDialogCancel), + ) { + state.wants_exit = false; + } + ctx.focus_on_first_present(); + } + ctx.table_end(); + } + + if ctx.modal_end() { + state.wants_exit = false; + } +} + +fn draw_dialog_about(ctx: &mut Context, state: &mut State) { + ctx.modal_begin("about", loc(LocId::AboutDialogTitle)); + { + ctx.block_begin("content"); + ctx.attr_padding(Rect::three(1, 2, 1)); + { + ctx.label( + "description", + Overflow::TruncateTail, + loc(LocId::AboutDialogDescription), + ); + ctx.attr_alignment(Alignment::Center); + ctx.label( + "version", + Overflow::TruncateHead, + &format!( + "{}{}", + loc(LocId::AboutDialogVersion), + env!("CARGO_PKG_VERSION") + ), + ); + ctx.attr_alignment(Alignment::Center); + ctx.label( + "copyright", + Overflow::TruncateTail, + "Copyright (c) Microsoft Corp 2025", + ); + ctx.attr_alignment(Alignment::Center); + } + ctx.block_end(); + } + if ctx.modal_end() { + state.wants_about = false; + } +} + +fn draw_error_log(ctx: &mut Context, state: &mut State) { + ctx.modal_begin("errors", "Error"); + ctx.attr_background_rgba(ctx.indexed(IndexedColor::Red)); + { + let off = state.error_log_index + state.error_log.len() - state.error_log_count; + for i in 0..state.error_log_count { + let idx = (off + i) % state.error_log.len(); + let msg = &state.error_log[idx][..]; + if !msg.is_empty() { + ctx.label("error", Overflow::TruncateTail, msg); + ctx.attr_padding(Rect::three(if i == 0 { 1 } else { 0 }, 2, 1)); + } + } + + if ctx.button("ok", Overflow::Clip, "Ok") { + state.error_log_count = 0; + } + ctx.attr_padding(Rect::three(1, 2, 1)); + ctx.focus_on_first_present(); + } + if ctx.modal_end() { + state.error_log_count = 0; + } +} + +fn error_log_add(state: &mut State, err: apperr::Error) { + let msg = err.message(); + if !msg.is_empty() { + state.error_log[state.error_log_index] = msg; + state.error_log_index = (state.error_log_index + 1) % state.error_log.len(); + state.error_log_count = cmp::min(state.error_log_count + 1, state.error_log.len()); + } +} + +fn file_open(path: &Path) -> apperr::Result { + File::open(path).map_err(apperr::Error::from) +} + +fn query_color_palette(tui: &mut Tui, vt_parser: &mut vt::Parser) { + let mut indexed_colors = framebuffer::DEFAULT_THEME; + + sys::write_stdout(concat!( + // OSC 4 color table requests for indices 0 through 15 (base colors). + "\x1b]4;0;?;1;?;2;?;3;?;4;?;5;?;6;?;7;?\x07", + "\x1b]4;8;?;9;?;10;?;11;?;12;?;13;?;14;?;15;?\x07", + // OSC 10 and 11 queries for the current foreground and background colors. + "\x1b]10;?\x07\x1b]11;?\x07", + // CSI c reports the terminal capabilities. + // It also helps us to detect the end of the responses, because not all + // terminals support the OSC queries, but all of them support CSI c. + "\x1b[c", + )); + + let mut done = false; + let mut osc_buffer = String::new(); + + while !done { + let Some(input) = sys::read_stdin(vt_parser.read_timeout()) else { + break; + }; + + let mut vt_stream = vt_parser.parse(&input); + while let Some(token) = vt_stream.next() { + match token { + Token::Csi(state) if state.final_byte == 'c' => done = true, + Token::Osc { mut data, partial } => { + if partial { + osc_buffer.push_str(data); + continue; + } + if !osc_buffer.is_empty() { + osc_buffer.push_str(data); + data = &osc_buffer; + } + + let mut splits = data.split_terminator(';'); + + let color = match splits.next().unwrap_or("") { + // The response is `4;;rgb://`. + "4" => match splits.next().unwrap_or("").parse::() { + Ok(val) if val < 16 => &mut indexed_colors[val], + _ => continue, + }, + // The response is `10;rgb://`. + "10" => &mut indexed_colors[IndexedColor::DefaultForeground as usize], + // The response is `11;rgb://`. + "11" => &mut indexed_colors[IndexedColor::DefaultBackground as usize], + _ => continue, + }; + + let color_param = splits.next().unwrap_or(""); + if !color_param.starts_with("rgb:") { + continue; + } + + let mut iter = color_param[4..].split_terminator('/'); + let rgb_parts = [(); 3].map(|_| iter.next().unwrap_or("0")); + let mut rgb = 0; + + for part in rgb_parts { + if part.len() == 2 || part.len() == 4 { + let Ok(mut val) = usize::from_str_radix(part, 16) else { + continue; + }; + if part.len() == 4 { + val = (val * 0xff + 0x80) / 0xffff; + } + rgb = (rgb >> 8) | ((val as u32) << 16); + } + } + + *color = rgb | 0xff000000; + osc_buffer.clear(); + } + _ => {} + } + } + } + + tui.setup_indexed_colors(indexed_colors); +} diff --git a/src/memchr.rs b/src/memchr.rs new file mode 100644 index 0000000..d99649f --- /dev/null +++ b/src/memchr.rs @@ -0,0 +1,491 @@ +//! Rust has a very popular `memchr` crate. It's quite fast, so you may ask yourself +//! why we don't just use it: Simply put, this is optimized for short inputs. + +use std::ptr::null; + +/// memchr(), but with two needles. +/// Returns the index of the first occurrence of either needle in the `haystack`. +/// If no needle is found, `haystack.len()` is returned. +/// `offset` specifies the index to start searching from. +pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8], offset: usize) -> usize { + unsafe { + let beg = haystack.as_ptr(); + let end = beg.add(haystack.len()); + let it = beg.add(offset.min(haystack.len())); + let it = memchr2_raw(needle1, needle2, it, end); + distance(it, beg) + } +} + +// In order to make `memchr2_raw` slim and fast, we use a function pointer that updates +// itself to the correct implementation on the first call. This reduces binary size. +// It would also reduce branches if we had >2 implementations (a jump still needs to be predicted). +// NOTE that this ONLY works if Control Flow Guard is disabled on Windows. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +static mut MEMCHR2_DISPATCH: unsafe fn( + needle1: u8, + needle2: u8, + beg: *const u8, + end: *const u8, +) -> *const u8 = memchr2_dispatch; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +unsafe fn memchr2_dispatch(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 { + let func = if is_x86_feature_detected!("avx2") { + memchr2_avx2 + } else { + memchr2_fallback + }; + unsafe { MEMCHR2_DISPATCH = func }; + unsafe { func(needle1, needle2, beg, end) } +} + +unsafe fn memchr2_raw(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + return unsafe { MEMCHR2_DISPATCH(needle1, needle2, beg, end) }; + + #[cfg(target_arch = "aarch64")] + return unsafe { memchr2_neon(needle1, needle2, beg, end) }; + + #[allow(unreachable_code)] + return unsafe { memchr2_fallback(needle1, needle2, beg, end) }; +} + +unsafe fn memchr2_fallback( + needle1: u8, + needle2: u8, + mut beg: *const u8, + end: *const u8, +) -> *const u8 { + unsafe { + while beg != end { + let ch = *beg; + if ch == needle1 || ch == needle2 { + break; + } + beg = beg.add(1); + } + beg + } +} + +// FWIW, I found that adding support for AVX512 was not useful at the time, +// as it only marginally improved file load performance by <5%. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "avx2")] +unsafe fn memchr2_avx2(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 { + unsafe { + use std::arch::x86_64::*; + + let n1 = _mm256_set1_epi8(needle1 as i8); + let n2 = _mm256_set1_epi8(needle2 as i8); + let mut remaining = distance(end, beg); + + while remaining >= 32 { + let v = _mm256_loadu_si256(beg as *const _); + let a = _mm256_cmpeq_epi8(v, n1); + let b = _mm256_cmpeq_epi8(v, n2); + let c = _mm256_or_si256(a, b); + let m = _mm256_movemask_epi8(c) as u32; + + if m != 0 { + return beg.add(m.trailing_zeros() as usize); + } + + beg = beg.add(32); + remaining -= 32; + } + + memchr2_fallback(needle1, needle2, beg, end) + + // TODO: This code probably works correctly but requires more testing. + /* + // Handle the remaining <32 bytes by reading 32 bytes and masking out the irrelevant data. + // This works, because x86 does not care about slice boundaries. It does care about page boundaries, however. + if remaining > 0 { + // Data beyond the beg/end range may not be mapped in. As such, we need to avoid reading beyond the + // page boundaries. This assumes 4KiB pages or larger. If we're in the lower half of the 4KiB page, + // we load data from `end.sub(off) == end.sub(remaining) == beg`, since we know that this 32-byte read + // can't possibly read 2KiB. Otherwise, we load from `end.sub(off) == end.sub(32)`, which essentially + // means we read such that the end of the read is aligned with the end of the haystack. The start of the + // SIMD register will then contain garbage we must ignore. + let off = if ((beg as usize) & 2048) != 0 { + 32 + } else { + remaining + }; + + let v = _mm256_loadu_si256(end.sub(off) as *const _); + let a = _mm256_cmpeq_epi8(v, n1); + let b = _mm256_cmpeq_epi8(v, n2); + let c = _mm256_or_si256(a, b); + let m = _mm256_movemask_epi8(c) as u32; + + // If we were in the upper half of the 4KiB page, we must shift the mask such that it's not aligned with + // the end of the haystack but rather with the current `beg`: A shift of `32 - remaining` is needed, + // which equals `off - remaining`. Otherwise, we must not shift at all. Luckily `off` will be `remaining` + // in that case and `remaining - remaining` is 0. + let m = m >> (off - remaining); + + // If we were in the lower half of the 4KiB page, we must mask out anything beyond the end of + // the haystack. Here, we basically restrict the "length" if `m` to contain `remaining`-many bits. + // In case of a read in the upper half this won't do anything, but that's fine. Branchless code is great. + let m = m & ((1 << remaining) - 1); + + if m != 0 { + return beg.add(m.trailing_zeros() as usize); + } + } + + end + */ + } +} + +#[cfg(target_arch = "aarch64")] +unsafe fn memchr2_neon(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 { + unsafe { + use std::arch::aarch64::*; + + if distance(end, beg) >= 16 { + let n1 = vdupq_n_u8(needle1); + let n2 = vdupq_n_u8(needle2); + + loop { + let v = vld1q_u8(beg as *const _); + let a = vceqq_u8(v, n1); + let b = vceqq_u8(v, n2); + let c = vorrq_u8(a, b); + + // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon + let m = vreinterpretq_u16_u8(c); + let m = vshrn_n_u16(m, 4); + let m = vreinterpret_u64_u8(m); + let m = vget_lane_u64(m, 0); + + if m != 0 { + return beg.add(m.trailing_zeros() as usize >> 2); + } + + beg = beg.add(16); + if distance(end, beg) < 16 { + break; + } + } + } + + memchr2_fallback(needle1, needle2, beg, end) + } +} + +/// Same as `memchr2`, but searches from the end of the haystack. +/// If no needle is found, 0 is returned. +/// +/// *NOTE: Unlike `memchr2` (or `memrchr`), an offset PAST the hit is returned.* +/// This is because this function is primarily used for `ucd::newlines_backward`, +/// which needs exactly that. +pub fn memrchr2(needle1: u8, needle2: u8, haystack: &[u8], offset: usize) -> Option { + unsafe { + let beg = haystack.as_ptr(); + let it = beg.add(offset.min(haystack.len())); + let it = memrchr2_raw(needle1, needle2, beg, it); + if it.is_null() { + None + } else { + Some(distance(it, beg)) + } + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +static mut MEMRCHR2_DISPATCH: unsafe fn( + needle1: u8, + needle2: u8, + beg: *const u8, + end: *const u8, +) -> *const u8 = memrchr2_dispatch; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +unsafe fn memrchr2_dispatch(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 { + let func = if is_x86_feature_detected!("avx2") { + memrchr2_avx2 + } else { + memrchr2_fallback + }; + unsafe { MEMRCHR2_DISPATCH = func }; + unsafe { func(needle1, needle2, beg, end) } +} + +unsafe fn memrchr2_raw(needle1: u8, needle2: u8, beg: *const u8, end: *const u8) -> *const u8 { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + return unsafe { MEMRCHR2_DISPATCH(needle1, needle2, beg, end) }; + + #[cfg(target_arch = "aarch64")] + return unsafe { memrchr2_neon(needle1, needle2, beg, end) }; + + #[allow(unreachable_code)] + return unsafe { memrchr2_fallback(needle1, needle2, beg, end) }; +} + +unsafe fn memrchr2_fallback( + needle1: u8, + needle2: u8, + beg: *const u8, + mut end: *const u8, +) -> *const u8 { + unsafe { + while end != beg { + end = end.sub(1); + let ch = *end; + if ch == needle1 || needle2 == ch { + return end; + } + } + null() + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "avx2")] +unsafe fn memrchr2_avx2(needle1: u8, needle2: u8, beg: *const u8, mut end: *const u8) -> *const u8 { + unsafe { + use std::arch::x86_64::*; + + if distance(end, beg) >= 32 { + let n1 = _mm256_set1_epi8(needle1 as i8); + let n2 = _mm256_set1_epi8(needle2 as i8); + + loop { + end = end.sub(32); + + let v = _mm256_loadu_si256(end as *const _); + let a = _mm256_cmpeq_epi8(v, n1); + let b = _mm256_cmpeq_epi8(v, n2); + let c = _mm256_or_si256(a, b); + let m = _mm256_movemask_epi8(c) as u32; + + if m != 0 { + return end.add(31 - m.leading_zeros() as usize); + } + + if distance(end, beg) < 32 { + break; + } + } + } + + memrchr2_fallback(needle1, needle2, beg, end) + } +} + +#[cfg(target_arch = "aarch64")] +unsafe fn memrchr2_neon(needle1: u8, needle2: u8, beg: *const u8, mut end: *const u8) -> *const u8 { + unsafe { + use std::arch::aarch64::*; + + if distance(end, beg) >= 16 { + let n1 = vdupq_n_u8(needle1); + let n2 = vdupq_n_u8(needle2); + + loop { + end = end.sub(16); + + let v = vld1q_u8(end as *const _); + let a = vceqq_u8(v, n1); + let b = vceqq_u8(v, n2); + let c = vorrq_u8(a, b); + + // https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon + let m = vreinterpretq_u16_u8(c); + let m = vshrn_n_u16(m, 4); + let m = vreinterpret_u64_u8(m); + let m = vget_lane_u64(m, 0); + + if m != 0 { + return end.add(15 - (m.leading_zeros() as usize >> 2)); + } + + if distance(end, beg) < 16 { + break; + } + } + } + + memrchr2_fallback(needle1, needle2, beg, end) + } +} + +/*pub struct Memchr2<'a> { + needle1: u8, + needle2: u8, + beg: *const u8, + end: *const u8, + it: *const u8, + _marker: PhantomData<&'a [u8]>, +} + +impl<'a> Memchr2<'a> { + pub fn new(needle1: u8, needle2: u8, haystack: &'a [u8]) -> Self { + Self { + needle1, + needle2, + beg: haystack.as_ptr(), + end: unsafe { haystack.as_ptr().add(haystack.len()) }, + it: haystack.as_ptr(), + _marker: PhantomData, + } + } +} + +impl Iterator for Memchr2<'_> { + type Item = usize; + + fn next(&mut self) -> Option { + if self.it.is_null() { + return None; + } + + self.it = unsafe { memchr2_raw(self.needle1, self.needle2, self.it, self.end) }; + if self.it.is_null() { + return None; + } + + let idx = unsafe { distance(self.it, self.beg) }; + self.it = if self.it == self.end { + null() + } else { + unsafe { self.it.add(1) } + }; + Some(idx) + } +} + +impl FusedIterator for Memchr2<'_> {} + +pub struct memrchr2<'a> { + needle1: u8, + needle2: u8, + beg: *const u8, + it: *const u8, + _marker: PhantomData<&'a [u8]>, +} + +impl<'a> memrchr2<'a> { + pub fn new(needle1: u8, needle2: u8, haystack: &'a [u8]) -> Self { + Self { + needle1, + needle2, + beg: haystack.as_ptr(), + it: unsafe { haystack.as_ptr().add(haystack.len()) }, + _marker: PhantomData, + } + } +} + +impl Iterator for memrchr2<'_> { + type Item = usize; + + fn next(&mut self) -> Option { + if self.it.is_null() { + return None; + } + + self.it = unsafe { memrchr2_raw(self.needle1, self.needle2, self.beg, self.it) }; + if self.it.is_null() { + return None; + } + + let idx = unsafe { distance(self.it, self.beg) }; + self.it = if self.it == self.beg { + null() + } else { + unsafe { self.it.sub(1) } + }; + Some(idx) + } +} + +impl FusedIterator for memrchr2<'_> {}*/ + +// Can be replaced with `sub_ptr` once it's stabilized. +#[inline(always)] +unsafe fn distance(hi: *const T, lo: *const T) -> usize { + unsafe { usize::try_from(hi.offset_from(lo)).unwrap_unchecked() } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sys; + use std::slice; + + #[test] + fn test_memchr2_empty() { + assert_eq!(memchr2(b'a', b'b', b"", 0), 0); + } + + #[test] + fn test_empty() { + assert_eq!(memrchr2(b'a', b'b', b"", 0), None); + } + + #[test] + fn test_basic() { + let haystack = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + let haystack = &haystack[..43]; + + assert_eq!(memchr2(b'a', b'z', haystack, 0), 0); + assert_eq!(memchr2(b'p', b'q', haystack, 0), 15); + assert_eq!(memchr2(b'Q', b'Z', haystack, 0), 42); + assert_eq!(memchr2(b'0', b'9', haystack, 0), haystack.len()); + + assert_eq!(memrchr2(b'Q', b'P', haystack, 43), Some(42)); + assert_eq!(memrchr2(b'p', b'o', haystack, 43), Some(15)); + assert_eq!(memrchr2(b'a', b'b', haystack, 43), Some(1)); + assert_eq!(memrchr2(b'0', b'9', haystack, 43), None); + } + + // Test that it doesn't match before/after the start offset respectively. + #[test] + fn test_with_offset() { + let haystack = b"abcdefghabcdefghabcdefghabcdefghabcdefgh"; + + assert_eq!(memrchr2(b'h', b'g', haystack, 40), Some(39)); + assert_eq!(memrchr2(b'h', b'g', haystack, 39), Some(38)); + assert_eq!(memrchr2(b'a', b'b', haystack, 9), Some(8)); + assert_eq!(memrchr2(b'a', b'b', haystack, 1), Some(0)); + assert_eq!(memrchr2(b'a', b'b', haystack, 0), None); + + assert_eq!(memchr2(b'a', b'b', haystack, 0), 0); + assert_eq!(memchr2(b'a', b'b', haystack, 1), 1); + assert_eq!(memchr2(b'a', b'b', haystack, 2), 8); + assert_eq!(memchr2(b'a', b'b', haystack, 9), 9); + assert_eq!(memchr2(b'a', b'b', haystack, 16), 16); + assert_eq!(memchr2(b'a', b'b', haystack, 41), 40); + } + + // Test memory access safety at page boundaries. + // The test is a success if it doesn't segfault. + #[test] + fn test_page_boundary() { + let page = unsafe { + let page_size = 4096; + + // 3 pages: uncommitted, committed, uncommitted + let ptr = sys::virtual_reserve(page_size * 3).unwrap() as *mut u8; + sys::virtual_commit(ptr.add(page_size), page_size).unwrap(); + slice::from_raw_parts_mut(ptr.add(page_size), page_size) + }; + + page.fill(b'a'); + + // Test if it seeks beyond the page boundary. + assert_eq!(memchr2(b'\0', b'\0', &page[page.len() - 40..], 0), 40); + // Test if it seeks before the page boundary for the masked/partial load. + assert_eq!(memchr2(b'\0', b'\0', &page[..10], 0), 10); + + // Same as above, but for memrchr2 (hence reversed). + assert_eq!(memrchr2(b'\0', b'\0', &page[page.len() - 10..], 10), None); + assert_eq!(memrchr2(b'\0', b'\0', &page[..40], 40), None); + } +} diff --git a/src/sys.rs b/src/sys.rs new file mode 100644 index 0000000..1f57af5 --- /dev/null +++ b/src/sys.rs @@ -0,0 +1,10 @@ +#[cfg(unix)] +mod unix; +#[cfg(windows)] +#[macro_use] +mod windows; + +#[cfg(unix)] +pub use unix::*; +#[cfg(windows)] +pub use windows::*; diff --git a/src/sys/unix.rs b/src/sys/unix.rs new file mode 100644 index 0000000..7d178a7 --- /dev/null +++ b/src/sys/unix.rs @@ -0,0 +1,353 @@ +use crate::apperr; +use std::ffi::{CStr, c_int, c_void}; +use std::fs::File; +use std::io::{ErrorKind, Read, Write}; +use std::mem::{self, ManuallyDrop, MaybeUninit}; +use std::os::fd::FromRawFd; +use std::ptr::{null, null_mut}; +use std::thread; +use std::time; + +pub fn preferred_languages() -> Vec { + let mut locales = Vec::new(); + + for key in ["LANGUAGE", "LC_ALL", "LANG"] { + if let Ok(val) = std::env::var(key) { + locales.extend( + val.split(':') + .filter(|val| !val.is_empty()) + .map(String::from), + ); + } + } + + locales +} + +extern "C" fn sigwinch_handler(_: libc::c_int) { + unsafe { + STATE.inject_resize = true; + } +} + +pub fn init() -> apperr::Result<()> { + unsafe { + // Reopen stdin/stdout if they're redirected. + if libc::isatty(STATE.stdin) == 0 { + STATE.stdin = check_int_return(libc::open(c"/dev/tty".as_ptr(), libc::O_RDONLY))?; + } + if libc::isatty(STATE.stdout) == 0 { + STATE.stdout = check_int_return(libc::open(c"/dev/tty".as_ptr(), libc::O_WRONLY))?; + } + + check_int_return(libc::tcgetattr( + STATE.stdout, + &raw mut STATE.stdout_initial_termios, + ))?; + + let mut termios = STATE.stdout_initial_termios; + termios.c_lflag &= !(libc::ICANON | libc::ECHO); + check_int_return(libc::tcsetattr(STATE.stdout, libc::TCSANOW, &termios))?; + + // Set STATE.inject_resize to true whenever we get a SIGWINCH. + let mut sigwinch_action: libc::sigaction = mem::zeroed(); + sigwinch_action.sa_sigaction = sigwinch_handler as libc::sighandler_t; + check_int_return(libc::sigaction( + libc::SIGWINCH, + &sigwinch_action, + null_mut(), + ))?; + + Ok(()) + } +} + +pub fn deinit() { + unsafe { + libc::tcsetattr( + STATE.stdout, + libc::TCSANOW, + &raw mut STATE.stdout_initial_termios, + ); + } +} + +pub fn inject_window_size_into_stdin() { + unsafe { + STATE.inject_resize = true; + } +} + +fn get_window_size() -> (u16, u16) { + let mut w = 0; + let mut h = 0; + + for attempt in 1.. { + let winsz = unsafe { + let mut winsz: libc::winsize = mem::zeroed(); + libc::ioctl(STATE.stdout, libc::TIOCGWINSZ, &raw mut winsz); + winsz + }; + + w = winsz.ws_col; + h = winsz.ws_row; + if w != 0 && h != 0 { + break; + } + + if attempt == 10 { + w = 80; + h = 24; + break; + } + + // Some terminals are bad emulators and don't report TIOCGWINSZ immediately. + thread::sleep(time::Duration::from_millis(10 * attempt)); + } + + (w, h) +} + +struct State { + stdin: libc::c_int, + stdout: libc::c_int, + stdout_initial_termios: libc::termios, + inject_resize: bool, + // Buffer for incomplete UTF-8 sequences (max 4 bytes needed) + utf8_buf: [u8; 4], + utf8_len: usize, +} + +static mut STATE: State = State { + stdin: libc::STDIN_FILENO, + stdout: libc::STDOUT_FILENO, + stdout_initial_termios: unsafe { mem::zeroed() }, + inject_resize: false, + utf8_buf: [0; 4], + utf8_len: 0, +}; + +/// Reads from stdin. +/// +/// Returns `None` if there was an error reading from stdin. +/// Returns `Some("")` if the given timeout was reached. +/// Otherwise, it returns the read, non-empty string. +pub fn read_stdin(timeout: Option) -> Option { + unsafe { + if let Some(timeout) = timeout { + let mut pollfd = libc::pollfd { + fd: STATE.stdin, + events: libc::POLLIN, + revents: 0, + }; + let ts = libc::timespec { + tv_sec: timeout.as_secs() as libc::time_t, + tv_nsec: timeout.subsec_nanos() as libc::c_long, + }; + let ret = libc::ppoll(&mut pollfd, 1, &ts, null()); + if ret < 0 { + return None; + } + if ret == 0 { + return Some(String::new()); + } + } + + #[allow(invalid_value)] + let mut buf: [u8; 1024] = MaybeUninit::uninit().assume_init(); + let mut read = 0; + + if STATE.utf8_len != 0 { + read = STATE.utf8_len; + input[..read].copy_from_slice(&STATE.utf8_buf[..read]); + } + + loop { + if STATE.inject_resize { + STATE.inject_resize = false; + let (w, h) = get_window_size(); + return Some(format!("\x1b[8;{};{}t", h, w)); + } + + // Read new data + let n = loop { + let ret = libc::read(STATE.stdin, buf.as_mut_ptr() as *mut _, buf.len()); + if ret > 0 { + break ret as usize; + } + if ret == 0 { + return None; + } + if *libc::__errno_location() != libc::EINTR { + return None; + } + }; + + // Prepend any cached incomplete UTF-8 sequence + let input = if STATE.utf8_len > 0 { + let total = STATE.utf8_len + n; + let mut combined = Vec::with_capacity(total); + combined.extend_from_slice(&STATE.utf8_buf[..STATE.utf8_len]); + combined.extend_from_slice(&buf[..n]); + STATE.utf8_len = 0; + combined + } else { + buf[..n].to_vec() + }; + + // Find last complete UTF-8 sequence + let mut valid_end = input.len(); + while valid_end > 0 && (input[valid_end - 1] & 0xC0) == 0x80 { + valid_end -= 1; + if input.len() - valid_end >= 4 || valid_end == 0 { + // Either too many trail bytes or all trail bytes - invalid UTF-8 + valid_end = input.len(); + break; + } + } + + // Cache incomplete sequence if any + if valid_end < input.len() { + let remaining = input.len() - valid_end; + STATE.utf8_buf[..remaining].copy_from_slice(&input[valid_end..]); + STATE.utf8_len = remaining; + } + + // Convert valid portion to string + if let Ok(s) = String::from_utf8(input[..valid_end].to_vec()) { + if !s.is_empty() { + return Some(s); + } + } + } + } +} + +pub fn write_stdout(text: &str) { + let buf = text.as_bytes(); + let mut written = 0; + + while written < buf.len() { + let w = &buf[written..]; + let n = unsafe { libc::write(STATE.stdout, w.as_ptr() as *const _, w.len()) }; + + if n >= 0 { + written += n as usize; + continue; + } + + let err = unsafe { *libc::__errno_location() }; + if err != libc::EINTR { + return; + } + } +} + +pub fn open_stdin_if_redirected() -> Option { + unsafe { + if libc::isatty(libc::STDIN_FILENO) == 0 { + Some(File::from_raw_fd(libc::STDIN_FILENO)) + } else { + None + } + } +} + +pub unsafe fn virtual_reserve(size: usize) -> apperr::Result<*mut u8> { + unsafe { + let ptr = libc::mmap( + null_mut(), + size, + libc::PROT_NONE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, + -1, + 0, + ); + if ptr == libc::MAP_FAILED { + Err(apperr::Error::new(libc::ENOMEM as u32)) + } else { + Ok(ptr as *mut u8) + } + } +} + +pub unsafe fn virtual_release(base: *mut u8, size: usize) { + unsafe { + libc::munmap(base as *mut libc::c_void, size); + } +} + +pub unsafe fn virtual_commit(base: *mut u8, size: usize) -> apperr::Result<()> { + unsafe { + let status = libc::mprotect( + base as *mut libc::c_void, + size, + libc::PROT_READ | libc::PROT_WRITE, + ); + if status != 0 { + Err(apperr::Error::new(libc::ENOMEM as u32)) + } else { + Ok(()) + } + } +} + +unsafe fn load_library(name: &CStr) -> apperr::Result<*mut c_void> { + unsafe { + let handle = libc::dlopen(name.as_ptr(), libc::RTLD_LAZY); + if handle.is_null() { + Err(apperr::Error::new(libc::ELIBACC as u32)) + } else { + Ok(handle) + } + } +} + +// It'd be nice to constrain T to std::marker::FnPtr, but that's unstable. +pub unsafe fn get_proc_address(handle: *mut c_void, name: &CStr) -> apperr::Result { + unsafe { + let sym = libc::dlsym(handle, name.as_ptr()); + if sym.is_null() { + Err(apperr::Error::new(libc::ELIBACC as u32)) + } else { + Ok(mem::transmute_copy(&sym)) + } + } +} + +pub unsafe fn load_icu() -> apperr::Result<*mut c_void> { + unsafe { load_library(c"icu.dll") } +} + +#[inline] +pub fn io_error_to_apperr(err: std::io::Error) -> apperr::Error { + unsafe { apperr::Error::new(err.raw_os_error().unwrap_or(0) as u32) } +} + +pub fn format_error(err: apperr::Error) -> String { + let errno = err.value() & 0xFFFF; + let mut result = format!("Error {}", errno); + + unsafe { + let ptr = libc::strerror(errno as i32); + if !ptr.is_null() { + let msg = CStr::from_ptr(ptr).to_string_lossy(); + result.push_str(": "); + result.push_str(&msg); + } + } + + result +} + +fn errno_to_apperr(no: c_int) -> apperr::Error { + unsafe { apperr::Error::new(no.max(1) as u32) } +} + +fn check_int_return(ret: libc::c_int) -> apperr::Result { + if ret < 0 { + Err(errno_to_apperr(unsafe { *libc::__errno_location() })) + } else { + Ok(ret) + } +} diff --git a/src/sys/windows.rs b/src/sys/windows.rs new file mode 100644 index 0000000..d2cf821 --- /dev/null +++ b/src/sys/windows.rs @@ -0,0 +1,524 @@ +use crate::helpers::{CoordType, Size}; +use crate::{apperr, helpers}; +use std::ffi::CStr; +use std::fmt::Write as _; +use std::fs::File; +use std::mem::MaybeUninit; +use std::os::windows::io::FromRawHandle; +use std::ptr::{null, null_mut}; +use std::{mem, time}; +use windows_sys::Win32::Foundation; +use windows_sys::Win32::Globalization; +use windows_sys::Win32::Storage::FileSystem; +use windows_sys::Win32::System::Console; +use windows_sys::Win32::System::Diagnostics::Debug; +use windows_sys::Win32::System::IO; +use windows_sys::Win32::System::LibraryLoader; +use windows_sys::Win32::System::Memory; +use windows_sys::Win32::System::Threading; +use windows_sys::w; + +pub fn preferred_languages() -> Vec { + unsafe { + const LEN: usize = 256; + + let mut lang_num = 0; + let mut lang_buf = [const { MaybeUninit::::uninit() }; LEN]; + let mut lang_buf_len = lang_buf.len() as u32; + if Globalization::GetUserPreferredUILanguages( + Globalization::MUI_LANGUAGE_NAME, + &mut lang_num, + lang_buf[0].as_mut_ptr(), + &mut lang_buf_len, + ) == 0 + || lang_num == 0 + { + return Vec::new(); + } + + // Drop the terminating double-null character. + lang_buf_len = lang_buf_len.saturating_sub(1); + + let mut lang_buf_utf8 = [const { MaybeUninit::::uninit() }; 3 * LEN]; + let lang_buf_utf8_len = Globalization::WideCharToMultiByte( + Globalization::CP_UTF8, + 0, + lang_buf[0].as_mut_ptr(), + lang_buf_len as i32, + lang_buf_utf8[0].as_mut_ptr(), + lang_buf_utf8.len() as i32, + null(), + null_mut(), + ); + if lang_buf_utf8_len == 0 { + return Vec::new(); + } + + let result = helpers::str_from_raw_parts_mut( + lang_buf_utf8[0].as_mut_ptr(), + lang_buf_utf8_len as usize, + ); + result.make_ascii_lowercase(); + result.split_terminator('\0').map(String::from).collect() + } +} + +type ReadConsoleInputExW = unsafe extern "system" fn( + h_console_input: Foundation::HANDLE, + lp_buffer: *mut Console::INPUT_RECORD, + n_length: u32, + lp_number_of_events_read: *mut u32, + w_flags: u16, +) -> Foundation::BOOL; + +const CONSOLE_READ_NOWAIT: u16 = 0x0002; + +struct State { + read_console_input_ex: ReadConsoleInputExW, + stdin: Foundation::HANDLE, + stdout: Foundation::HANDLE, + stdin_cp_old: u32, + stdout_cp_old: u32, + stdin_mode_old: u32, + stdout_mode_old: u32, + leading_surrogate: u16, + inject_resize: bool, + wants_exit: bool, +} + +static mut STATE: State = State { + read_console_input_ex: read_console_input_ex_placeholder, + stdin: null_mut(), + stdout: null_mut(), + stdin_cp_old: 0, + stdout_cp_old: 0, + stdin_mode_old: 0, + stdout_mode_old: 0, + leading_surrogate: 0, + inject_resize: false, + wants_exit: false, +}; + +unsafe extern "system" fn read_console_input_ex_placeholder( + _: Foundation::HANDLE, + _: *mut Console::INPUT_RECORD, + _: u32, + _: *mut u32, + _: u16, +) -> Foundation::BOOL { + panic!(); +} + +extern "system" fn console_ctrl_handler(_ctrl_type: u32) -> Foundation::BOOL { + unsafe { + STATE.wants_exit = true; + IO::CancelIoEx(STATE.stdin, null()); + } + 1 +} + +pub fn init() -> apperr::Result<()> { + unsafe { + let kernel32 = LibraryLoader::GetModuleHandleW(w!("kernel32.dll")); + STATE.read_console_input_ex = get_proc_address(kernel32, c"ReadConsoleInputExW")?; + + check_bool_return(Console::SetConsoleCtrlHandler( + Some(console_ctrl_handler), + 1, + ))?; + + STATE.stdin = FileSystem::CreateFileW( + w!("CONIN$"), + Foundation::GENERIC_READ | Foundation::GENERIC_WRITE, + FileSystem::FILE_SHARE_READ | FileSystem::FILE_SHARE_WRITE, + null_mut(), + FileSystem::OPEN_EXISTING, + 0, + null_mut(), + ); + STATE.stdout = FileSystem::CreateFileW( + w!("CONOUT$"), + Foundation::GENERIC_READ | Foundation::GENERIC_WRITE, + FileSystem::FILE_SHARE_READ | FileSystem::FILE_SHARE_WRITE, + null_mut(), + FileSystem::OPEN_EXISTING, + 0, + null_mut(), + ); + if STATE.stdin == Foundation::INVALID_HANDLE_VALUE + || STATE.stdout == Foundation::INVALID_HANDLE_VALUE + { + return Err(get_last_error()); + } + + STATE.stdin_cp_old = Console::GetConsoleCP(); + STATE.stdout_cp_old = Console::GetConsoleOutputCP(); + check_bool_return(Console::GetConsoleMode( + STATE.stdin, + &raw mut STATE.stdin_mode_old, + ))?; + check_bool_return(Console::GetConsoleMode( + STATE.stdout, + &raw mut STATE.stdout_mode_old, + ))?; + + check_bool_return(Console::SetConsoleCP(Globalization::CP_UTF8))?; + check_bool_return(Console::SetConsoleOutputCP(Globalization::CP_UTF8))?; + check_bool_return(Console::SetConsoleMode( + STATE.stdin, + Console::ENABLE_WINDOW_INPUT + | Console::ENABLE_EXTENDED_FLAGS + | Console::ENABLE_VIRTUAL_TERMINAL_INPUT, + ))?; + check_bool_return(Console::SetConsoleMode( + STATE.stdout, + Console::ENABLE_PROCESSED_OUTPUT + | Console::ENABLE_WRAP_AT_EOL_OUTPUT + | Console::ENABLE_VIRTUAL_TERMINAL_PROCESSING + | Console::DISABLE_NEWLINE_AUTO_RETURN, + ))?; + + Ok(()) + } +} + +pub fn deinit() { + unsafe { + Console::SetConsoleCP(STATE.stdin_cp_old); + Console::SetConsoleOutputCP(STATE.stdout_cp_old); + Console::SetConsoleMode(STATE.stdin, STATE.stdin_mode_old); + Console::SetConsoleMode(STATE.stdout, STATE.stdout_mode_old); + } +} + +pub fn inject_window_size_into_stdin() { + unsafe { + STATE.inject_resize = true; + } +} + +fn get_console_size() -> Option { + unsafe { + let mut info: Console::CONSOLE_SCREEN_BUFFER_INFOEX = mem::zeroed(); + info.cbSize = mem::size_of::() as u32; + if Console::GetConsoleScreenBufferInfoEx(STATE.stdout, &mut info) == 0 { + return None; + } + + let w = (info.srWindow.Right - info.srWindow.Left + 1).max(1) as CoordType; + let h = (info.srWindow.Bottom - info.srWindow.Top + 1).max(1) as CoordType; + Some(Size { + width: w, + height: h, + }) + } +} + +/// Reads from stdin. +/// +/// Returns `None` if there was an error reading from stdin. +/// Returns `Some("")` if the given timeout was reached. +/// Otherwise, it returns the read, non-empty string. +pub fn read_stdin(timeout: Option) -> Option { + let mut input_buf = [const { MaybeUninit::::uninit() }; 1024]; + let mut input_buf_cap = input_buf.len(); + let mut utf16_buf = [const { MaybeUninit::::uninit() }; 1024]; + let mut utf16_buf_len = 0; + let mut resize_event = None; + let mut read_more = true; + let mut read_poll = timeout.is_some(); + + if unsafe { STATE.inject_resize } { + resize_event = get_console_size(); + read_poll = true; + unsafe { STATE.inject_resize = false }; + } + + if unsafe { STATE.leading_surrogate } != 0 { + utf16_buf[0] = MaybeUninit::new(unsafe { STATE.leading_surrogate }); + utf16_buf_len = 1; + input_buf_cap -= 1; + unsafe { STATE.leading_surrogate = 0 }; + } + + if let Some(timeout) = timeout { + let wait_result = + unsafe { Threading::WaitForSingleObject(STATE.stdin, timeout.as_millis() as u32) }; + match wait_result { + // Ready to read? Continue with reading below. + // `read_more` is already true to ensure we don't block. + Foundation::WAIT_OBJECT_0 => {} + // Timeout? Skip reading entirely. + Foundation::WAIT_TIMEOUT => read_more = false, + // Error? Tell the caller stdin is broken. + _ => return None, + } + } + + // This loops exists, just in case there's events in the input buffer that we aren't interested in. + // It should be rare for this to loop. + while read_more { + let input = unsafe { + // If we had a `inject_resize`, we don't want to block indefinitely for other pending input on startup, + // but are still interested in any other pending input that may be waiting for us. + let flags = if read_poll { CONSOLE_READ_NOWAIT } else { 0 }; + let mut read = 0; + let ok = (STATE.read_console_input_ex)( + STATE.stdin, + input_buf[0].as_mut_ptr(), + input_buf_cap as u32, + &mut read, + flags, + ); + if ok == 0 || STATE.wants_exit { + return None; + } + &*(&input_buf[..read as usize] as *const _ as *const [Console::INPUT_RECORD]) + }; + + for inp in input { + match inp.EventType as u32 { + Console::KEY_EVENT => { + let event = unsafe { &inp.Event.KeyEvent }; + let ch = unsafe { event.uChar.UnicodeChar }; + if event.bKeyDown != 0 && ch != 0 { + utf16_buf[utf16_buf_len] = MaybeUninit::new(ch); + utf16_buf_len += 1; + } + } + Console::WINDOW_BUFFER_SIZE_EVENT => { + let event = unsafe { &inp.Event.WindowBufferSizeEvent }; + let w = event.dwSize.X as CoordType; + let h = event.dwSize.Y as CoordType; + // Windows is prone to sending broken/useless `WINDOW_BUFFER_SIZE_EVENT`s. + // E.g. starting conhost will emit 3 in a row. Skip rendering in that case. + if w > 0 && h > 0 { + resize_event = Some(Size { + width: w, + height: h, + }); + } + } + _ => {} + } + } + + read_more = !resize_event.is_some() && utf16_buf_len == 0; + } + + const RESIZE_EVENT_FMT_MAX_LEN: usize = 16; // "\x1b[8;65535;65535t" + let resize_event_len = if resize_event.is_some() { + RESIZE_EVENT_FMT_MAX_LEN + } else { + 0 + }; + // +1 to account for a potential `STATE.leading_surrogate`. + let utf8_max_len = (utf16_buf_len + 1) * 3; + let mut text = String::with_capacity(utf8_max_len + resize_event_len); + + if let Some(resize_event) = resize_event { + // If I read xterm's documentation correctly, CSI 18 t reports the window size in characters. + // CSI 8 ; height ; width t is the response. Of course, we didn't send the request, + // but we can use this fake response to trigger the editor to resize itself. + _ = write!( + text, + "\x1b[8;{};{}t", + resize_event.height, resize_event.width + ); + } + + // If the input ends with a lone lead surrogate, we need to remember it for the next read. + if utf16_buf_len > 0 { + unsafe { + let last_char = utf16_buf[utf16_buf_len - 1].assume_init(); + if 0xD800 <= last_char && last_char <= 0xDBFF { + STATE.leading_surrogate = last_char; + utf16_buf_len -= 1; + } + } + } + + // Convert the remaining input to UTF8, the sane encoding. + if utf16_buf_len > 0 { + unsafe { + let vec = text.as_mut_vec(); + let spare = vec.spare_capacity_mut(); + + let len = Globalization::WideCharToMultiByte( + Globalization::CP_UTF8, + 0, + utf16_buf[0].as_ptr(), + utf16_buf_len as i32, + spare.as_mut_ptr() as *mut _, + spare.len() as i32, + null(), + null_mut(), + ); + + if len > 0 { + vec.set_len(vec.len() + len as usize); + } + } + } + + Some(text) +} + +pub fn write_stdout(text: &str) { + unsafe { + let mut offset = 0; + + while offset < text.len() { + let ptr = text.as_ptr().add(offset); + let write = (text.len() - offset).min(1024 * 1024 * 1024) as u32; + let mut written = 0; + let ok = FileSystem::WriteFile(STATE.stdout, ptr, write, &mut written, null_mut()); + offset += written as usize; + if ok == 0 || written == 0 { + break; + } + } + } +} + +pub fn open_stdin_if_redirected() -> Option { + unsafe { + let handle = Console::GetStdHandle(Console::STD_INPUT_HANDLE); + match FileSystem::GetFileType(handle) { + FileSystem::FILE_TYPE_DISK | FileSystem::FILE_TYPE_PIPE => { + Some(File::from_raw_handle(handle)) + } + _ => None, + } + } +} + +pub unsafe fn virtual_reserve(size: usize) -> apperr::Result<*mut u8> { + unsafe { + let mut base = null_mut(); + + if cfg!(debug_assertions) { + static mut S_BASE_GEN: usize = 0x0000100000000000; + S_BASE_GEN += 0x0000100000000000; + base = S_BASE_GEN as *mut _; + } + + check_ptr_return(Memory::VirtualAlloc( + base, + size, + Memory::MEM_RESERVE, + Memory::PAGE_READWRITE, + ) as *mut u8) + } +} + +pub unsafe fn virtual_release(base: *mut u8, size: usize) { + unsafe { + Memory::VirtualFree(base as *mut _, size, Memory::MEM_RELEASE); + } +} + +pub unsafe fn virtual_commit(base: *mut u8, size: usize) -> apperr::Result<()> { + unsafe { + check_ptr_return(Memory::VirtualAlloc( + base as *mut _, + size, + Memory::MEM_COMMIT, + Memory::PAGE_READWRITE, + )) + .map(|_| ()) + } +} + +unsafe fn load_library(name: *const u16) -> apperr::Result { + unsafe { + check_ptr_return(LibraryLoader::LoadLibraryExW( + name, + null_mut(), + LibraryLoader::LOAD_LIBRARY_SEARCH_SYSTEM32, + )) + } +} + +// It'd be nice to constrain T to std::marker::FnPtr, but that's unstable. +pub unsafe fn get_proc_address(handle: Foundation::HMODULE, name: &CStr) -> apperr::Result { + unsafe { + let ptr = LibraryLoader::GetProcAddress(handle, name.as_ptr() as *const u8); + if let Some(ptr) = ptr { + Ok(mem::transmute_copy(&ptr)) + } else { + Err(get_last_error()) + } + } +} + +pub unsafe fn load_icu() -> apperr::Result { + unsafe { load_library(w!("icu.dll")) } +} + +#[cold] +fn get_last_error() -> apperr::Error { + unsafe { gle_to_apperr(Foundation::GetLastError()) } +} + +#[inline] +fn gle_to_apperr(gle: u32) -> apperr::Error { + unsafe { + apperr::Error::new(if gle == 0 { + 0x8000FFFF + } else { + 0x80070000 | gle + }) + } +} + +#[inline] +pub fn io_error_to_apperr(err: std::io::Error) -> apperr::Error { + gle_to_apperr(err.raw_os_error().unwrap_or(0) as u32) +} + +pub fn format_error(err: apperr::Error) -> String { + unsafe { + let mut ptr: *mut u8 = null_mut(); + let len = Debug::FormatMessageA( + Debug::FORMAT_MESSAGE_ALLOCATE_BUFFER + | Debug::FORMAT_MESSAGE_FROM_SYSTEM + | Debug::FORMAT_MESSAGE_IGNORE_INSERTS, + null(), + err.value() as u32, + 0, + &mut ptr as *mut *mut _ as *mut _, + 0, + null_mut(), + ); + + let mut result = format!("Error {:#08x}", err.value()); + + if len > 0 { + let msg = helpers::str_from_raw_parts(ptr, len as usize); + let msg = msg.trim_ascii(); + let msg = msg.replace(['\r', '\n'], " "); + result.push_str(": "); + result.push_str(&msg); + Foundation::LocalFree(ptr as *mut _); + } + + result + } +} + +fn check_bool_return(ret: Foundation::BOOL) -> apperr::Result<()> { + if ret == 0 { + Err(get_last_error()) + } else { + Ok(()) + } +} + +fn check_ptr_return(ret: *mut T) -> apperr::Result<*mut T> { + if ret.is_null() { + Err(get_last_error()) + } else { + Ok(ret) + } +} diff --git a/src/trust_me_bro.rs b/src/trust_me_bro.rs new file mode 100644 index 0000000..f774719 --- /dev/null +++ b/src/trust_me_bro.rs @@ -0,0 +1,7 @@ +pub fn this_lifetime_change_is_totally_safe<'a, T: ?Sized>(x: &T) -> &'a T { + unsafe { std::mem::transmute(x) } +} + +pub fn this_lifetime_change_is_totally_safe_mut<'a, T: ?Sized>(x: &mut T) -> &'a mut T { + unsafe { std::mem::transmute(x) } +} diff --git a/src/tui.rs b/src/tui.rs new file mode 100644 index 0000000..a831efc --- /dev/null +++ b/src/tui.rs @@ -0,0 +1,2958 @@ +use crate::buffer::{CursorMovement, RcTextBuffer}; +use crate::framebuffer::{Framebuffer, INDEXED_COLORS_COUNT, IndexedColor}; +use crate::helpers::{CoordType, Point, Rect, Size, hash, hash_str, wymix}; +use crate::input::{InputKeyMod, kbmod, vk}; +use crate::ucd::Document; +use crate::{helpers, input, trust_me_bro, ucd}; +use std::fmt::Write as _; +use std::iter; +use std::mem; +use std::ptr::{self, null}; + +const ROOT_ID: u64 = 0x14057B7EF767814F; // Knuth's MMIX constant + +#[derive(PartialEq, Eq)] +pub enum Overflow { + Clip, + TruncateHead, + TruncateMiddle, + TruncateTail, +} + +type InputText<'input> = input::InputText<'input>; +type InputKey = input::InputKey; +type InputMouseState = input::InputMouseState; + +struct RefinedInputMouse { + state: InputMouseState, + modifier: u32, + gesture: InputMouseGesture, +} + +enum RefinedInput<'a> { + None, + Text(InputText<'a>), + Keyboard(InputKey), + Mouse(RefinedInputMouse), + Scroll(Point), +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum InputMouseGesture { + None, + Click, + Drag(Point), +} + +struct CachedTextBuffer { + node_id: u64, + editor: RcTextBuffer, + seen: bool, +} + +pub struct Tui { + framebuffer: Framebuffer, + + /// Last known terminal size. + size: Size, + /// Last known mouse position. + mouse_position: Point, + /// Between mouse down and up, the position where the mouse was pressed. + /// Otherwise, this contains Point::MIN. + mouse_down_position: Point, + /// Last known mouse state. + mouse_state: InputMouseState, + + clipboard: Vec, + cached_text_buffers: Vec, + hovered_node_path: Vec, + focused_node_path: Vec, + + prev_tree: Tree, + prev_node_map: Vec<*const Node>, + prev_node_map_shift: usize, + prev_node_map_mask: u64, + + settling_have: u32, + settling_want: u32, +} + +impl Tui { + pub fn new() -> Self { + let mut tui = Self { + framebuffer: Framebuffer::new(), + + size: Size { + width: 0, + height: 0, + }, + mouse_position: Point::MIN, + mouse_down_position: Point::MIN, + mouse_state: InputMouseState::None, + + clipboard: Vec::new(), + cached_text_buffers: Vec::with_capacity(16), + hovered_node_path: Vec::with_capacity(16), + focused_node_path: Vec::with_capacity(16), + + prev_tree: Tree::new(), + prev_node_map: vec![null(); 1], + prev_node_map_shift: 0, + prev_node_map_mask: 0, + + settling_have: 0, + settling_want: 0, + }; + tui.hovered_node_path.push(ROOT_ID); + tui.focused_node_path.push(ROOT_ID); + tui + } + + pub fn size(&self) -> Size { + self.size + } + + /// Sets up indexed colors for the TUI context. + pub fn setup_indexed_colors(&mut self, colors: [u32; INDEXED_COLORS_COUNT]) { + self.framebuffer.set_indexed_colors(colors); + } + + pub fn create_context<'tui, 'input>( + &'tui mut self, + input: Option>, + ) -> Context<'tui, 'input> { + // In the input handler below we transformed a mouse up into a release event. + // Now, a frame later, we must reset it back to none, to stop it from triggering things. + // Same for Scroll events. + if self.mouse_state > InputMouseState::Right { + self.mouse_state = InputMouseState::None; + self.mouse_down_position = Point::MIN; + } + + let mut input_consumed = false; + let mut input_text = None; + let mut input_keyboard = None; + let mut input_mouse_modifiers = kbmod::NONE; + let mut input_mouse_gesture = InputMouseGesture::None; + let mut input_scroll_delta = Point { x: 0, y: 0 }; + + match input { + None => { + input_consumed = true; + } + Some(input::Input::Resize(resize)) => { + assert!(resize.width > 0 && resize.height > 0); + assert!(resize.width < 32768 && resize.height < 32768); + self.size = resize; + } + Some(input::Input::Text(text)) => { + input_text = Some(text); + if !text.bracketed + && text.text.len() == 1 + && (b'a'..=b'z').contains(&text.text.as_bytes()[0]) + { + let ch = text.text.as_bytes()[0] as u32; + let ch = ch & !0x20; // Uppercase. + input_keyboard = Some(InputKey::new(ch)); + } + } + Some(input::Input::Keyboard(keyboard)) => { + input_keyboard = Some(keyboard); + } + Some(input::Input::Mouse(mouse)) => { + let mut next_state = mouse.state; + let next_position = mouse.position; + let next_scroll = mouse.scroll; + + let mut focused_node = null(); + + for root in self.prev_tree.iterate_roots() { + Tree::visit_all(root, root, 0, true, |_, node| { + if !node.outer_clipped.contains(next_position) { + // Skip the entire sub-tree, because it doesn't contain the cursor. + return VisitControl::SkipChildren; + } + if node.attributes.focusable { + focused_node = node; + } + VisitControl::Continue + }); + } + + let focused_node = unsafe { focused_node.as_ref() }; + Self::build_node_path(focused_node, &mut self.hovered_node_path); + + if self.mouse_state != InputMouseState::None && next_state == InputMouseState::None + { + // When the input transitions from some mouse input to no mouse input, + // we'll emit 1 InputMouseAction::Release event. + next_state = InputMouseState::Release; + } else if self.mouse_state == InputMouseState::None + && next_state == InputMouseState::Left + { + // On left-mouse-down we change focus. + self.focused_node_path = self.hovered_node_path.clone(); + self.mouse_down_position = next_position; + self.needs_more_settling(); // See `needs_more_settling()`. + } + + let next_gesture = if next_state == InputMouseState::Release + && next_position == self.mouse_position + { + // Mouse down and up happened at the same position = Click. + // TODO: This should instead check if the focus stack is the same as on mouse-down. + InputMouseGesture::Click + } else if self.mouse_state == InputMouseState::Left + && next_state == InputMouseState::Left + && next_position != self.mouse_position + { + // Mouse down and moved = Drag. + InputMouseGesture::Drag(Point { + x: next_position.x - self.mouse_position.x, + y: next_position.y - self.mouse_position.y, + }) + } else { + InputMouseGesture::None + }; + + input_mouse_modifiers = mouse.modifiers; + input_mouse_gesture = next_gesture; + input_scroll_delta = next_scroll; + self.mouse_position = next_position; + self.mouse_state = next_state; + } + } + + if input.is_some() { + // Every time there's input, we naturally need to re-render at least once. + self.settling_have = 0; + self.settling_want = 1; + } + + Context { + tui: self, + + time: std::time::Instant::now(), + input_text, + input_keyboard, + input_mouse_modifiers, + input_mouse_gesture, + input_scroll_delta, + input_consumed, + + tree: Tree::new(), + next_block_id_mixin: 0, + needs_settling: false, + } + } + + fn build_node_path(node: Option<&Node>, path: &mut Vec) { + path.clear(); + iter::successors(node, |&node| Tree::node_ref(node.parent)).for_each(|node| { + path.push(node.id); + }); + if path.is_empty() { + path.push(ROOT_ID); + } + } + + fn report_context_completion(&mut self, ctx: &mut Context) { + // If this hits, you forgot to block_end() somewhere. The best way to figure + // out where is to do a binary search of commenting out code in main.rs. + debug_assert!( + Tree::node_ref(ctx.tree.current_node) + .map(|r| r.stack_parent) + .unwrap_or(null()) + .is_null() + ); + + // If nodes have appeared or disappeared, we need to re-render. + // Same, if the focus has changed (= changes the highlight color, etc.). + let mut needs_settling = ctx.needs_settling; + needs_settling |= self.prev_tree.checksum != ctx.tree.checksum; + + // Adopt the new tree and recalculate the node hashmap. + self.prev_tree = mem::take(&mut ctx.tree); + { + let width = (4 * self.prev_tree.count + 1).ilog2().max(1) as usize; + let shift = std::mem::size_of::() * 8 - width; + let slots = 1 << width; + let mask = slots - 1; + let node_map = &mut self.prev_node_map; + + if slots != node_map.len() { + *node_map = vec![null(); slots]; + } else { + node_map.fill(null()); + } + + let mut node = self.prev_tree.root_first; + while !node.is_null() { + let n = unsafe { &*node }; + let mut slot = (n.id >> shift) as usize; + loop { + if node_map[slot].is_null() { + node_map[slot] = n; + break; + } + slot = (slot + 1) & mask; + } + node = n.next; + } + + self.prev_node_map_shift = shift; + self.prev_node_map_mask = mask as u64; + } + + let mut focus_path_pop_min = 0; + // If the user pressed Escape, we move the focus to a parent node. + if !ctx.input_consumed && ctx.consume_shortcut(vk::ESCAPE) { + focus_path_pop_min = 1; + } + + // Remove any unknown nodes from the focus path. + // It's important that we do this after the tree has been swapped out, + // so that pop_focusable_node() has access to the newest version of the tree. + let focus_path_changed = self.pop_focusable_node(focus_path_pop_min); + needs_settling |= focus_path_changed; + + // If some elements went away and the focus path changed above, we ignore tab presses. + // It may otherwise lead to weird situations where focus moves unexpectedly. + if !focus_path_changed && !ctx.input_consumed && ctx.input_keyboard.is_some() { + needs_settling |= self.move_focus(ctx.input_keyboard.unwrap()); + } + + if needs_settling { + self.needs_more_settling(); + } + + // Remove cached text editors that are no longer in use. + self.cached_text_buffers.retain(|c| c.seen); + + for root in Tree::iterate_siblings(self.prev_tree.root_first) { + root.compute_intrinsic_size(); + } + + let viewport = self.size.as_rect(); + + for root in Tree::iterate_siblings(self.prev_tree.root_first) { + if let Some(float) = &root.attributes.float { + let anchor = Tree::node_ref(root.parent).unwrap(); + let mut x = anchor.outer.left; + let mut y = anchor.outer.top; + let size = root.intrinsic_to_outer(); + + x += float.offset.x; + y += float.offset.y; + x -= (float.gravity_x * size.width as f32 + 0.5f32) as CoordType; + y -= (float.gravity_y * size.height as f32 + 0.5f32) as CoordType; + + root.outer.left = x; + root.outer.top = y; + root.outer.right = x + size.width; + root.outer.bottom = y + size.height; + root.outer = root.outer.intersect(viewport); + } else { + root.outer = viewport; + } + + root.inner = root.outer_to_inner(root.outer); + root.outer_clipped = root.outer; + root.inner_clipped = root.inner; + root.layout_children(root.outer); + } + } + + /// After you finished processing all input, continue redrawing your UI until this returns false. + pub fn needs_settling(&mut self) -> bool { + self.settling_have += 1; + self.settling_have <= self.settling_want + } + + fn needs_more_settling(&mut self) { + // If the focus has changed, the new node may need to be re-rendered. + // Same, every time we encounter a previously unknown node via `get_prev_node`, + // because that means it likely failed to get crucial information such as the layout size. + // + // But we put a maximum on how many times we'll re-render in a row, in order + // to prevent accidental infinite loops. Honestly, anything >2 would be weird. + debug_assert!(self.settling_want < 5); + self.settling_want = (self.settling_want + 1).min(10); + } + + /// Renders all nodes into a string-frame representation. + pub fn render(&mut self) -> String { + self.framebuffer.reset(self.size); + for child in self.prev_tree.iterate_roots() { + self.render_node(child); + } + self.framebuffer.render() + } + + /// Recursively renders each node and its children. + #[allow(clippy::only_used_in_recursion)] + fn render_node(&mut self, node: &mut Node) { + let outer_clipped = node.outer_clipped; + if outer_clipped.is_empty() { + return; + } + + if node.attributes.bordered { + // ┌────┐ + { + let mut fill = String::new(); + fill.push('┌'); + helpers::string_append_repeat( + &mut fill, + '─', + (outer_clipped.right - outer_clipped.left - 2) as usize, + ); + fill.push('┐'); + self.framebuffer.replace_text( + outer_clipped.top, + outer_clipped.left, + outer_clipped.right, + &fill, + ); + } + + // │ │ + { + let mut fill = String::new(); + fill.push('│'); + helpers::string_append_repeat( + &mut fill, + ' ', + (outer_clipped.right - outer_clipped.left - 2) as usize, + ); + fill.push('│'); + + for y in outer_clipped.top + 1..outer_clipped.bottom - 1 { + self.framebuffer.replace_text( + y, + outer_clipped.left, + outer_clipped.right, + &fill, + ); + } + } + + // └────┘ + { + let mut fill = String::new(); + fill.push('└'); + helpers::string_append_repeat( + &mut fill, + '─', + (outer_clipped.right - outer_clipped.left - 2) as usize, + ); + fill.push('┘'); + self.framebuffer.replace_text( + outer_clipped.bottom - 1, + outer_clipped.left, + outer_clipped.right, + &fill, + ); + } + } else if node.attributes.float.is_some() { + let mut fill = String::new(); + helpers::string_append_repeat( + &mut fill, + ' ', + (outer_clipped.right - outer_clipped.left) as usize, + ); + + for y in outer_clipped.top..outer_clipped.bottom { + self.framebuffer + .replace_text(y, outer_clipped.left, outer_clipped.right, &fill); + } + } + + if node.attributes.focus_brackets { + let has_focus = self.is_node_focused(node.id); + let center_y = (outer_clipped.top + outer_clipped.bottom) / 2; + self.framebuffer.replace_text( + center_y, + node.outer.left, + node.outer.left + 1, + if has_focus { ">" } else { "[" }, + ); + self.framebuffer.replace_text( + center_y, + node.outer.right - 1, + node.outer.right, + if has_focus { "<" } else { "]" }, + ); + } + + { + let mut rect = outer_clipped; + if node.attributes.focus_brackets { + rect.left += 1; + rect.right -= 1; + } + self.framebuffer.blend_bg(rect, node.attributes.bg); + self.framebuffer.blend_fg(rect, node.attributes.fg); + } + + let inner = node.inner; + let inner_clipped = node.inner_clipped; + if inner_clipped.is_empty() { + return; + } + + match &mut node.content { + NodeContent::Modal(title) => { + self.framebuffer.replace_text( + node.outer.top, + node.outer.left + 2, + node.outer.right - 1, + title, + ); + } + NodeContent::Text(content) => { + if !inner_clipped.is_empty() { + if content.overflow != Overflow::Clip + && node.intrinsic_size.width > inner.width() + // TODO: Implement ellipsis support for text with multiple chunks. + && content.chunks.len() == 1 + { + let actual_width = node.intrinsic_size.width; + let restricted_width = inner.width(); + let chunk = &content.chunks[0]; + let text = &chunk.text[..]; + let bytes = text.as_bytes(); + let mut modified = String::with_capacity(text.len()); + let mut cfg = ucd::MeasurementConfig::new(&bytes); + + match content.overflow { + Overflow::Clip => unreachable!(), + Overflow::TruncateHead => { + modified.push('…'); + let beg = cfg.goto_visual(Point { + x: actual_width - restricted_width + 1, + y: 0, + }); + modified.push_str(&text[beg.offset..]); + } + Overflow::TruncateMiddle => { + let mid_beg_x = restricted_width / 2; + let mid_end_x = actual_width - mid_beg_x; + let beg = cfg.goto_visual(Point { x: mid_beg_x, y: 0 }); + let mut end = cfg.goto_visual(Point { x: mid_end_x, y: 0 }); + if end.visual_pos.x < mid_end_x { + // If we intersected a wide glyph, we need to move past that. + end = cfg.goto_logical(Point { + x: end.logical_pos.x + 1, + y: 0, + }); + } + modified.push_str(&text[..beg.offset]); + modified.push('…'); + modified.push_str(&text[end.offset..]); + } + Overflow::TruncateTail => { + let end = cfg.goto_visual(Point { + x: restricted_width - 1, + y: 0, + }); + modified.push_str(&text[..end.offset]); + modified.push('…'); + } + } + + let rect = self.framebuffer.replace_text( + inner_clipped.top, + inner_clipped.left, + inner_clipped.right, + &modified, + ); + self.framebuffer.blend_fg(rect, chunk.fg); + } else { + let mut beg_x = inner.left; + for chunk in &content.chunks { + let rect = self.framebuffer.replace_text( + inner_clipped.top, + beg_x, + inner_clipped.right, + &chunk.text, + ); + self.framebuffer.blend_fg(rect, chunk.fg); + beg_x = rect.right; + } + } + } + } + NodeContent::Textarea(content) => { + let tb = &mut *content.buffer; + let mut destination = Rect { + left: inner_clipped.left, + top: inner_clipped.top, + right: inner_clipped.right, + bottom: inner_clipped.bottom, + }; + + if !content.single_line { + // Account for the scrollbar. + destination.right -= 1; + } + + tb.render( + content.scroll_offset, + destination, + content.has_focus, + &mut self.framebuffer, + ); + + if !content.single_line { + // Render the scrollbar. + let track = Rect { + left: inner_clipped.right - 1, + top: inner_clipped.top, + right: inner_clipped.right, + bottom: inner_clipped.bottom, + }; + self.framebuffer.draw_scrollbar( + inner_clipped, + track, + content.scroll_offset.y, + tb.get_visual_line_count() + inner.height() - 1, + ); + } + } + NodeContent::Scrollarea(pos) => { + let content = Tree::node_ref(node.children.first).unwrap(); + let track = Rect { + left: inner.right, + top: inner.top, + right: inner.right + 1, + bottom: inner.bottom, + }; + self.framebuffer.draw_scrollbar( + outer_clipped, + track, + pos.y, + content.intrinsic_size.height, + ); + } + _ => {} + } + + for child in Tree::iterate_siblings(node.children.first) { + self.render_node(child); + } + } + + /// Outputs a debug string of the layout and focus tree. + pub fn debug_layout(&mut self) -> String { + let mut result = String::new(); + result.push_str("general:\r\n- focus_path:\r\n"); + + for &id in self.focused_node_path.iter().rev() { + _ = write!(result, " - {:016x}\r\n", id); + } + + result.push_str("\r\ntree:\r\n"); + + for root in self.prev_tree.iterate_roots() { + Tree::visit_all(root, root, 0, true, |depth, node| { + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!(result, "- id: {:016x}\r\n", node.id); + + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!(result, " classname: {}\r\n", node.classname); + + if depth == 0 { + if let Some(parent) = Tree::node_ref(node.parent) { + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!(result, " parent: {:016x}\r\n", parent.id); + } + } + + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!( + result, + " intrinsic: {{{}, {}}}\r\n", + node.intrinsic_size.width, node.intrinsic_size.height + ); + + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!( + result, + " outer: {{{}, {}, {}, {}}}\r\n", + node.outer.left, node.outer.top, node.outer.right, node.outer.bottom + ); + + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!( + result, + " inner: {{{}, {}, {}, {}}}\r\n", + node.inner.left, node.inner.top, node.inner.right, node.inner.bottom + ); + + if node.attributes.bordered { + helpers::string_append_repeat(&mut result, ' ', depth * 2); + result.push_str(" bordered: true\r\n"); + } + + if node.attributes.bg != 0 { + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!(result, " bg: #{:08x}\r\n", node.attributes.bg); + } + + if node.attributes.fg != 0 { + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!(result, " fg: #{:08x}\r\n", node.attributes.fg); + } + + if self.is_node_focused(node.id) { + helpers::string_append_repeat(&mut result, ' ', depth * 2); + result.push_str(" focused: true\r\n"); + } + + match &node.content { + NodeContent::Text(content) => { + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!( + result, + " text: \"{}\"\r\n", + content + .chunks + .iter() + .map(|c| c.text.as_str()) + .collect::() + ); + } + NodeContent::Textarea(content) => { + let tb = &*content.buffer; + helpers::string_append_repeat(&mut result, ' ', depth * 2); + _ = write!(result, " textarea: {tb:p}\r\n"); + } + NodeContent::Scrollarea(..) => { + helpers::string_append_repeat(&mut result, ' ', depth * 2); + result.push_str(" scrollable: true\r\n"); + } + _ => {} + } + + VisitControl::Continue + }); + } + + result + } + + /// Checks if the pointer is on the current node's boundary (hover). + pub fn is_node_hovered(&mut self, id: u64) -> bool { + // We construct the hovered_node_path always with at least 1 element (the root id). + unsafe { *self.hovered_node_path.get_unchecked(0) == id } + } + + /// Checks if a node's subtree contains the hover path. + pub fn is_subtree_hovered(&mut self, id: u64) -> bool { + self.hovered_node_path.contains(&id) + } + + /// Checks if a node with the given ID has input focus. + fn is_node_focused(&self, id: u64) -> bool { + // We construct the focused_node_path always with at least 1 element (the root id). + unsafe { *self.focused_node_path.get_unchecked(0) == id } + } + + /// Checks if a node's subtree contains the focus path. + fn is_subtree_focused(&self, id: u64) -> bool { + self.focused_node_path.contains(&id) + } + + fn get_prev_node<'b>(&mut self, id: u64) -> Option<&'b Node> { + let node_map = &self.prev_node_map[..]; + let shift = self.prev_node_map_shift; + let mask = self.prev_node_map_mask; + let mut slot = (id >> shift) & mask; + + loop { + let node = node_map[slot as usize]; + if node.is_null() { + return None; + } + if unsafe { &*node }.id == id { + return Some(unsafe { &*(node as *const _) }); + } + slot = (slot + 1) & mask; + } + } + + fn pop_focusable_node(&mut self, pop_minimum: usize) -> bool { + let pop_minimum = pop_minimum.min(self.focused_node_path.len()); + let path = &self.focused_node_path[pop_minimum..self.focused_node_path.len()]; + + // TODO: This is only needed because get_prev_node doesn't live on a property of `self`. + // That would fix the borrow checker issue. + let path = trust_me_bro::this_lifetime_change_is_totally_safe(path); + + // Find the next focusable node upwards in the hierarchy. + let focusable_idx = path + .iter() + .position(|&id| { + self.get_prev_node(id) + .is_some_and(|node| node.attributes.focusable) + }) + .unwrap_or(path.len()); + + let keep_idx = focusable_idx + pop_minimum; + + if keep_idx == 0 { + // Nothing to remove. + false + } else { + // Remove all the nodes between us and the next focusable node. + self.focused_node_path.drain(..focusable_idx + pop_minimum); + if self.focused_node_path.is_empty() { + self.focused_node_path.push(ROOT_ID); + } + true + } + } + + fn move_focus(&mut self, input: InputKey) -> bool { + const SHIFT_TAB: InputKey = vk::TAB.with_modifiers(kbmod::SHIFT); + + let Some(focused) = self.get_prev_node(self.focused_node_path[0]) else { + debug_assert!(false); // The caller should've cleaned up the focus path. + return false; + }; + + if input == vk::LEFT || input == vk::RIGHT { + if let Some(row) = Tree::node_ref(focused.parent) { + if let Some(table) = Tree::node_ref(row.parent) { + if matches!(table.content, NodeContent::Table(..)) { + let mut next = if input == vk::LEFT { + focused.siblings.prev + } else { + focused.siblings.next + }; + if next.is_null() { + next = if input == vk::LEFT { + row.children.last + } else { + row.children.first + }; + } + let next = Tree::node_ref(next).unwrap(); + if !ptr::eq(next, focused) { + Tui::build_node_path(Some(next), &mut self.focused_node_path); + return true; + } + } + } + } + } + + if input == vk::UP || input == vk::DOWN { + if let Some(list) = Tree::node_ref(focused.parent) { + if matches!(list.content, NodeContent::List) { + let mut next = if input == vk::UP { + focused.siblings.prev + } else { + focused.siblings.next + }; + if next.is_null() { + next = if input == vk::UP { + list.children.last + } else { + list.children.first + }; + } + let next = Tree::node_ref(next).unwrap(); + if !ptr::eq(next, focused) { + Tui::build_node_path(Some(next), &mut self.focused_node_path); + return true; + } + } + } + } + + let forward = match input { + SHIFT_TAB => false, + vk::TAB => true, + _ => return false, + }; + + // Figure out the container within which the focuse must be contained. + // This way, tab/shift-tab only moves within the same window. + // The ROOT_ID node has no parent, and the others have a float attribute. + // If the window is the focused node, it should of course not move upward. + let mut root = focused; + while !root.parent.is_null() && root.attributes.float.is_none() { + root = Tree::node_ref(root.parent).unwrap(); + } + + let mut last_node_in_window = root; + while !last_node_in_window.children.last.is_null() { + last_node_in_window = unsafe { &*last_node_in_window.children.last }; + } + + // If the window doesn't contain any nodes, there's nothing to focus. + // This also protects against infinite loops below. + if ptr::eq(root, last_node_in_window) { + return false; + } + + if !focused.attributes.focusable { + debug_assert!(false); + return false; + } + + let mut focused_next = focused as *const _; + Tree::visit_all(root, focused, usize::MAX / 2, forward, |_, node| { + if node.attributes.focusable && !ptr::eq(node, root) && !ptr::eq(node, focused) { + focused_next = node; + VisitControl::Stop + } else { + VisitControl::Continue + } + }); + + if ptr::eq(focused_next, focused) { + false + } else { + Tui::build_node_path( + unsafe { focused_next.as_ref() }, + &mut self.focused_node_path, + ); + true + } + } +} + +pub struct Context<'tui, 'input> { + tui: &'tui mut Tui, + + time: std::time::Instant, + /// Current text input, if any. + input_text: Option>, + /// Current keyboard input, if any. + input_keyboard: Option, + input_mouse_modifiers: InputKeyMod, + input_mouse_gesture: InputMouseGesture, + /// By how much the mouse wheel was scrolled since the last frame. + input_scroll_delta: Point, + input_consumed: bool, + + tree: Tree, + next_block_id_mixin: u64, + needs_settling: bool, +} + +impl Drop for Context<'_, '_> { + fn drop(&mut self) { + let this = self as *mut _; + self.tui.report_context_completion(unsafe { &mut *this }); + } +} + +impl Context<'_, '_> { + /// Returns the current terminal size. + pub fn size(&self) -> Size { + self.tui.size + } + + pub fn indexed(&self, index: IndexedColor) -> u32 { + self.tui.framebuffer.indexed(index) + } + + pub fn set_clipboard(&mut self, data: Vec) { + self.tui.clipboard = data; + } + + pub fn get_clipboard(&self) -> &[u8] { + &self.tui.clipboard + } + + /// Begins a new UI block (container) with a unique ID. + pub fn block_begin(&mut self, classname: &'static str) { + let parent = self.tree.current_node_mut(); + + let mut id = hash_str(parent.id, classname); + if self.next_block_id_mixin != 0 { + id = hash(id, &self.next_block_id_mixin.to_ne_bytes()); + self.next_block_id_mixin = 0; + } + + // If this hits, you have tried to create a block with the same ID as a previous one + // somewhere up this call stack. Change the classname, or use next_block_id_mixin(). + #[cfg(debug_assertions)] + for child in Tree::iterate_siblings(parent.children.first) { + debug_assert_ne!(child.id, id); + } + + self.tree.append_child(Node { + stack_parent: parent, + id, + classname, + parent, + ..Default::default() + }); + } + + /// Ends the current UI block, returning to its parent container. + pub fn block_end(&mut self) { + self.tree.pop_stack(); + } + + /// Mixes in an extra value to the next UI block's ID for uniqueness. + pub fn next_block_id_mixin(&mut self, id: u64) { + self.next_block_id_mixin = id; + } + + pub fn focus_on_first_present(&mut self) { + let last_node = self.tree.last_node_mut(); + last_node.attributes.focusable = true; + if self.tui.get_prev_node(last_node.id).is_none() { + self.steal_focus(); + } + } + + pub fn steal_focus(&mut self) { + let last_node = self.tree.last_node_ref(); + self.needs_settling = true; + Tui::build_node_path(Some(last_node), &mut self.tui.focused_node_path); + } + + pub fn toss_focus_up(&mut self) { + let current_node = self.tree.current_node_ref(); + // Check the path length to avoid popping the root node and scheduling a rerender for no reason. + if current_node.attributes.focusable + && self.tui.focused_node_path.len() >= 2 + && self.tui.is_node_focused(current_node.id) + { + self.tui.pop_focusable_node(1); + self.needs_settling = true; + } + } + + pub fn inherit_focus(&mut self) { + let last_node = self.tree.last_node_mut(); + let Some(parent) = Tree::node_mut(last_node.parent) else { + return; + }; + + last_node.attributes.focusable = true; + // Mark the parent as focusable, so that if the user presses Escape, + // and `block_end` bubbles the focus up the tree, it'll stop on our parent, + // which will then focus us on the next iteration. + parent.attributes.focusable = true; + + if self.tui.is_node_focused(parent.id) { + self.needs_settling = true; + self.tui.focused_node_path.insert(0, last_node.id); + } + } + + fn attr_focusable(&mut self) { + let last_node = self.tree.last_node_mut(); + last_node.attributes.focusable = true; + } + + pub fn attr_intrinsic_size(&mut self, size: Size) { + let last_node = self.tree.last_node_mut(); + last_node.intrinsic_size = size; + last_node.intrinsic_size_set = true; + } + + pub fn attr_float(&mut self, spec: FloatSpec) { + let last_node = self.tree.last_node_mut(); + let anchor = match spec.anchor { + Anchor::Last if !last_node.siblings.prev.is_null() => last_node.siblings.prev, + Anchor::Last | Anchor::Parent => last_node.parent, + Anchor::Root => self.tree.root_first, + }; + + // Remove the node from the UI tree and insert it into the floater list. + last_node.remove_from_parent(); + last_node.siblings.prev = self.tree.root_last; + if let Some(root) = Tree::node_mut(self.tree.root_last) { + root.siblings.next = last_node; + self.tree.root_last = last_node; + } + + last_node.parent = anchor; + last_node.attributes.float = Some(FloatAttributes { + gravity_x: spec.gravity_x.clamp(0.0, 1.0), + gravity_y: spec.gravity_y.clamp(0.0, 1.0), + offset: Point { + x: spec.offset_x, + y: spec.offset_y, + }, + }); + } + + pub fn attr_border(&mut self) { + let last_node = self.tree.last_node_mut(); + last_node.attributes.bordered = true; + } + + pub fn attr_alignment(&mut self, align: Alignment) { + let last_node = self.tree.last_node_mut(); + last_node.attributes.align = align; + } + + pub fn attr_padding(&mut self, padding: Rect) { + let last_node = self.tree.last_node_mut(); + last_node.attributes.padding = Self::normalize_rect(padding); + } + + fn normalize_rect(rect: Rect) -> Rect { + Rect { + left: rect.left.max(0), + top: rect.top.max(0), + right: rect.right.max(0), + bottom: rect.bottom.max(0), + } + } + + pub fn attr_background_rgba(&mut self, bg: u32) { + let last_node = self.tree.last_node_mut(); + last_node.attributes.bg = bg; + } + + pub fn attr_foreground_rgba(&mut self, fg: u32) { + let last_node = self.tree.last_node_mut(); + last_node.attributes.fg = fg; + } + + pub fn consume_shortcut(&mut self, shortcut: InputKey) -> bool { + if !self.input_consumed && self.input_keyboard == Some(shortcut) { + self.set_input_consumed(); + true + } else { + false + } + } + + fn set_input_consumed(&mut self) { + debug_assert!(!self.input_consumed); + self.input_consumed = true; + } + + pub fn is_hovering(&mut self) -> bool { + let last_node = self.tree.last_node_ref(); + self.tui.is_node_hovered(last_node.id) + } + + pub fn is_focused(&mut self) -> bool { + let last_node = self.tree.last_node_ref(); + self.tui.is_node_focused(last_node.id) + } + + pub fn contains_focus(&mut self) -> bool { + let last_node = self.tree.last_node_ref(); + self.tui.is_subtree_focused(last_node.id) + } + + pub fn modal_begin(&mut self, classname: &'static str, title: &str) { + self.block_begin(classname); + self.focus_on_first_present(); + self.attr_border(); + self.attr_background_rgba(self.indexed(IndexedColor::White)); + self.attr_float(FloatSpec { + anchor: Anchor::Root, + gravity_x: 0.5, + gravity_y: 0.5, + offset_x: self.tui.size.width / 2, + offset_y: self.tui.size.height / 2, + }); + + let last_node = self.tree.last_node_mut(); + last_node.content = NodeContent::Modal(title.to_string()); + } + + pub fn modal_end(&mut self) -> bool { + self.block_end(); + !self.contains_focus() || self.consume_shortcut(vk::ESCAPE) + } + + pub fn table_begin(&mut self, classname: &'static str) { + self.block_begin(classname); + + let last_node = self.tree.last_node_mut(); + last_node.content = NodeContent::Table(TableContent { + columns: Vec::new(), + cell_gap: Size::default(), + }); + } + + pub fn table_set_columns(&mut self, columns: &[i32]) { + let last_node = self.tree.last_node_mut(); + if let NodeContent::Table(spec) = &mut last_node.content { + spec.columns = columns.to_vec(); + } else { + debug_assert!(false); + } + } + + pub fn table_set_cell_gap(&mut self, cell_gap: Size) { + let last_node = self.tree.last_node_mut(); + if let NodeContent::Table(spec) = &mut last_node.content { + spec.cell_gap = cell_gap; + } else { + debug_assert!(false); + } + } + + pub fn table_next_row(&mut self) { + let current_node = self.tree.current_node_ref(); + + // If this is the first call to table_next_row() inside a new table, the + // current_node will refer to the table. Otherwise, it'll refer to the current row. + if !matches!(current_node.content, NodeContent::Table(_)) { + let Some(parent) = Tree::node_ref(current_node.parent) else { + return; + }; + + // Neither the current nor its parent nodes are a table? + // You definitely called this outside of a table block. + debug_assert!(matches!(parent.content, NodeContent::Table(_))); + + self.block_end(); + self.next_block_id_mixin(parent.child_count as u64); + } + + self.block_begin("row"); + } + + pub fn table_end(&mut self) { + let current_node = self.tree.current_node_ref(); + + // If this is the first call to table_next_row() inside a new table, the + // current_node will refer to the table. Otherwise, it'll refer to the current row. + if !matches!(current_node.content, NodeContent::Table(_)) { + self.block_end(); + } + + self.block_end(); // table + } + + pub fn label(&mut self, classname: &'static str, overflow: Overflow, text: &str) { + self.styled_label_begin(classname, overflow); + self.styled_label_add_text(text); + self.styled_label_end(); + } + + pub fn styled_label_begin(&mut self, classname: &'static str, overflow: Overflow) { + self.block_begin(classname); + self.tree.last_node_mut().content = NodeContent::Text(TextContent { + chunks: Vec::new(), + overflow, + }); + } + + pub fn styled_label_set_foreground_indexed(&mut self, index: Option) { + let fg = index.map(|i| self.indexed(i)).unwrap_or(0); + if let Some(chunk) = self.styled_label_get_last_chunk(true) { + chunk.fg = fg; + } + } + + pub fn styled_label_add_text(&mut self, text: &str) { + if let Some(chunk) = self.styled_label_get_last_chunk(false) { + chunk.text.push_str(text); + } + } + + fn styled_label_get_last_chunk(&mut self, flush: bool) -> Option<&mut StyledTextChunk> { + let last_node = self.tree.last_node_mut(); + let NodeContent::Text(content) = &mut last_node.content else { + // You called styled_label_*() outside an styled_label_*() block. + debug_assert!(false); + return None; + }; + + if content.chunks.is_empty() || (flush && !content.chunks.last().unwrap().text.is_empty()) { + content.chunks.push(StyledTextChunk { + text: String::new(), + fg: 0, + }); + } + + content.chunks.last_mut() + } + + pub fn styled_label_end(&mut self) { + let last_node = self.tree.last_node_mut(); + let NodeContent::Text(content) = &last_node.content else { + return; + }; + + let cursor = ucd::MeasurementConfig::new(&content.chunks).goto_visual(Point { + x: CoordType::MAX, + y: 0, + }); + last_node.intrinsic_size.width = cursor.visual_pos.x; + last_node.intrinsic_size.height = 1; + last_node.intrinsic_size_set = true; + + self.block_end(); + } + + pub fn button(&mut self, classname: &'static str, overflow: Overflow, text: &str) -> bool { + self.label(classname, overflow, text); + + let last_node = self.tree.last_node_mut(); + last_node.attributes.focusable = true; + last_node.attributes.focus_brackets = true; + + self.button_activated() + } + + pub fn checkbox( + &mut self, + classname: &'static str, + overflow: Overflow, + text: &str, + checked: &mut bool, + ) -> bool { + self.styled_label_begin(classname, overflow); + self.styled_label_add_text(if *checked { "▣ " } else { "☐ " }); + self.styled_label_add_text(text); + self.styled_label_end(); + + let last_node = self.tree.last_node_mut(); + last_node.attributes.focusable = true; + last_node.attributes.focus_brackets = true; + + let activated = self.button_activated(); + if activated { + *checked = !*checked; + } + activated + } + + fn button_activated(&mut self) -> bool { + if !self.input_consumed + && ((self.input_mouse_gesture == InputMouseGesture::Click && self.is_hovering()) + || self.input_keyboard == Some(vk::RETURN) + || self.input_keyboard == Some(vk::SPACE)) + && self.is_focused() + { + self.set_input_consumed(); + true + } else { + false + } + } + + pub fn editline<'a, 'b: 'a>(&'a mut self, classname: &'static str, text: &'b mut String) { + self.block_begin(classname); + + let node = self.tree.current_node_mut(); + let cached; + if let Some(buffer) = self + .tui + .cached_text_buffers + .iter_mut() + .find(|t| t.node_id == node.id) + { + cached = buffer; + cached.seen = true; + } else { + self.tui.cached_text_buffers.push(CachedTextBuffer { + node_id: node.id, + editor: RcTextBuffer::new(true).unwrap(), + seen: true, + }); + cached = self.tui.cached_text_buffers.last_mut().unwrap(); + } + + let mut buffer = cached.editor.clone(); + buffer.copy_from_str(text); + + self.textarea_internal(buffer.clone(), true); + + if buffer.is_dirty() { + buffer.save_as_string(text); + } + + self.block_end(); + } + + pub fn textarea(&mut self, classname: &'static str, tb: RcTextBuffer) { + self.block_begin(classname); + self.textarea_internal(tb, false); + self.block_end(); + } + + fn textarea_internal(&mut self, buffer: RcTextBuffer, single_line: bool) { + self.attr_focusable(); + + let node = self.tree.last_node_mut(); + node.attributes.bg = self.indexed(IndexedColor::DefaultBackground); + node.attributes.fg = self.indexed(IndexedColor::DefaultForeground); + node.attributes.focus_brackets = single_line; + + let mut content = TextareaContent { + buffer, + scroll_offset: Point::default(), + last_click: self.time, + preferred_column: 0, + single_line, + has_focus: self.tui.is_node_focused(node.id), + }; + + if let Some(node_prev) = self.tui.get_prev_node(node.id) { + if let NodeContent::Textarea(content_prev) = &node_prev.content { + content.scroll_offset = content_prev.scroll_offset; + content.preferred_column = content_prev.preferred_column; + content.last_click = content_prev.last_click; + + let mut text_width = node_prev.inner.width(); + if !single_line { + // Subtract -1 to account for the scrollbar. + text_width -= 1; + } + + let mut make_cursor_visible = content.buffer.take_cursor_visibility_request(); + make_cursor_visible |= content.buffer.set_width(text_width); + make_cursor_visible |= + self.textarea_handle_input(&mut content, node_prev, single_line); + + if make_cursor_visible { + self.textarea_make_cursor_visible(&mut content, node_prev); + } + } else { + debug_assert!(false); + } + } + + self.textarea_adjust_scroll_offset(&mut content); + + node.intrinsic_size.height = content.buffer.get_visual_line_count(); + node.intrinsic_size_set = true; + node.content = NodeContent::Textarea(content); + } + + fn textarea_handle_input( + &mut self, + content: &mut TextareaContent, + node_prev: &Node, + single_line: bool, + ) -> bool { + if self.input_consumed { + return false; + } + + let tb = &mut *content.buffer; + + if self.tui.mouse_state != InputMouseState::None { + let mut make_cursor_visible = false; + + match self.tui.mouse_state { + InputMouseState::Left => { + if self.tui.is_node_hovered(node_prev.id) { + let pos = Point { + x: self.tui.mouse_position.x + - node_prev.inner.left + - tb.get_margin_width() + + content.scroll_offset.x, + y: self.tui.mouse_position.y - node_prev.inner.top + + content.scroll_offset.y, + }; + + match self.input_mouse_gesture { + InputMouseGesture::Drag(delta) => { + let track_rect = Rect { + left: node_prev.inner.right - 1, + top: node_prev.inner.top, + right: node_prev.inner.right, + bottom: node_prev.inner.bottom, + }; + if track_rect.contains(self.tui.mouse_down_position) { + let track_height = track_rect.height(); + if track_height > 0 { + // The textarea supports 1 height worth of "scrolling beyond the end". + // `track_height` is the same as the viewport height. + let content_height = + tb.get_visual_line_count() + track_height; + content.scroll_offset.y += + delta.y * content_height / track_height; + } + } else { + tb.selection_update_visual(pos); + } + } + _ => { + if pos == tb.get_cursor_visual_pos() { + if self.time - content.last_click + < std::time::Duration::from_millis(500) + { + tb.select_word(); + } + } else if self.input_mouse_modifiers.contains(kbmod::SHIFT) { + // TODO: Untested because Windows Terminal surprisingly doesn't support Shift+Click. + tb.selection_update_visual(pos); + } else { + tb.cursor_move_to_visual(pos); + } + content.last_click = self.time; + make_cursor_visible = true; + } + } + + content.preferred_column = tb.get_cursor_visual_pos().x; + self.set_input_consumed(); + } + } + InputMouseState::Release => { + if self.tui.is_node_hovered(node_prev.id) { + tb.selection_finalize(); + self.set_input_consumed(); + } + } + InputMouseState::Scroll => { + if self.tui.is_node_hovered(node_prev.id) { + content.scroll_offset.x += self.input_scroll_delta.x; + content.scroll_offset.y += self.input_scroll_delta.y; + self.set_input_consumed(); + } + } + _ => {} + } + + return make_cursor_visible; + } + + if !content.has_focus { + return false; + } + + if let Some(input) = &self.input_text { + let mut text = input.text.as_bytes(); + if single_line { + let (end, _) = ucd::newlines_forward(text, 0, 0, 1); + text = ucd::strip_newline(&text[..end]); + } + + tb.write(text); + + content.preferred_column = tb.get_cursor_visual_pos().x; + self.set_input_consumed(); + return true; + } + + if let Some(input) = &self.input_keyboard { + let key = input.key(); + let modifiers = input.modifiers(); + let mut make_cursor_visible = true; + + match key { + vk::BACK => { + let granularity = if modifiers == kbmod::CTRL { + CursorMovement::Word + } else { + CursorMovement::Grapheme + }; + tb.delete(granularity, -1); + } + vk::TAB => { + if single_line { + // If this is just a simple input field, don't consume Tab (= early return). + return false; + } + tb.write(b"\t"); + } + vk::RETURN => { + if single_line { + // If this is just a simple input field, don't consume Enter (= early return). + return false; + } + tb.write(b"\n"); + } + vk::ESCAPE => { + // If there was a selection, clear it and show the cursor (= fallthrough). + if !tb.clear_selection() { + if single_line { + // If this is just a simple input field, don't consume the escape key + // (early return) and don't show the cursor (= return false). + return false; + } + + // If this is a textarea, don't show the cursor if + // the escape key was pressed and nothing happened. + make_cursor_visible = false; + } + } + vk::PRIOR => { + let height = node_prev.inner.height(); + + // If the cursor was already on the first line, + // move it to the start of the buffer. + if tb.get_cursor_visual_pos().y == 0 { + content.preferred_column = 0; + } + + if modifiers == kbmod::SHIFT { + tb.selection_update_visual(Point { + x: content.preferred_column, + y: tb.get_cursor_visual_pos().y - height, + }); + } else { + tb.cursor_move_to_visual(Point { + x: content.preferred_column, + y: tb.get_cursor_visual_pos().y - height, + }); + } + + content.scroll_offset.y -= height; + } + vk::NEXT => { + let height = node_prev.inner.height(); + + // If the cursor was already on the last line, + // move it to the end of the buffer. + if tb.get_cursor_visual_pos().y >= tb.get_visual_line_count() - 1 { + content.preferred_column = CoordType::MAX; + } + + if modifiers == kbmod::SHIFT { + tb.selection_update_visual(Point { + x: content.preferred_column, + y: tb.get_cursor_visual_pos().y + height, + }); + } else { + tb.cursor_move_to_visual(Point { + x: content.preferred_column, + y: tb.get_cursor_visual_pos().y + height, + }); + } + + if content.preferred_column == CoordType::MAX { + content.preferred_column = tb.get_cursor_visual_pos().x; + } + + content.scroll_offset.y += height; + } + vk::END => { + if modifiers == kbmod::SHIFT { + tb.selection_update_logical(Point { + x: CoordType::MAX, + y: tb.get_cursor_logical_pos().y, + }); + } else { + tb.cursor_move_to_logical(Point { + x: CoordType::MAX, + y: tb.get_cursor_logical_pos().y, + }); + } + } + vk::HOME => { + if modifiers == kbmod::SHIFT { + tb.selection_update_logical(Point { + x: 0, + y: tb.get_cursor_logical_pos().y, + }); + } else { + tb.cursor_move_to_logical(Point { + x: 0, + y: tb.get_cursor_logical_pos().y, + }); + } + } + vk::LEFT => { + let granularity = if modifiers == kbmod::CTRL { + CursorMovement::Word + } else { + CursorMovement::Grapheme + }; + if modifiers == kbmod::SHIFT { + tb.selection_update_delta(granularity, -1); + } else { + tb.cursor_move_delta(granularity, -1); + } + } + vk::UP => { + match modifiers { + kbmod::NONE => { + // If the cursor was already on the first line, + // move it to the start of the buffer. + if tb.get_cursor_visual_pos().y == 0 { + content.preferred_column = 0; + } + + tb.cursor_move_to_visual(Point { + x: content.preferred_column, + y: tb.get_cursor_visual_pos().y - 1, + }); + } + kbmod::CTRL => { + content.scroll_offset.y -= 1; + make_cursor_visible = false; + } + kbmod::SHIFT => { + // If the cursor was already on the first line, + // move it to the start of the buffer. + if tb.get_cursor_visual_pos().y == 0 { + content.preferred_column = 0; + } + + tb.selection_update_visual(Point { + x: content.preferred_column, + y: tb.get_cursor_visual_pos().y - 1, + }); + } + kbmod::CTRL_ALT => { + // TODO: Add cursor above + } + _ => return false, + } + } + vk::RIGHT => { + let granularity = if modifiers == kbmod::CTRL { + CursorMovement::Word + } else { + CursorMovement::Grapheme + }; + if modifiers == kbmod::SHIFT { + tb.selection_update_delta(granularity, 1); + } else { + tb.cursor_move_delta(granularity, 1); + } + } + vk::DOWN => { + match modifiers { + kbmod::NONE => { + // If the cursor was already on the last line, + // move it to the end of the buffer. + if tb.get_cursor_visual_pos().y >= tb.get_visual_line_count() - 1 { + content.preferred_column = CoordType::MAX; + } + + tb.cursor_move_to_visual(Point { + x: content.preferred_column, + y: tb.get_cursor_visual_pos().y + 1, + }); + + if content.preferred_column == CoordType::MAX { + content.preferred_column = tb.get_cursor_visual_pos().x; + } + } + kbmod::CTRL => { + content.scroll_offset.y += 1; + make_cursor_visible = false; + } + kbmod::SHIFT => { + // If the cursor was already on the last line, + // move it to the end of the buffer. + if tb.get_cursor_visual_pos().y >= tb.get_visual_line_count() - 1 { + content.preferred_column = CoordType::MAX; + } + + tb.selection_update_visual(Point { + x: content.preferred_column, + y: tb.get_cursor_visual_pos().y + 1, + }); + + if content.preferred_column == CoordType::MAX { + content.preferred_column = tb.get_cursor_visual_pos().x; + } + } + kbmod::CTRL_ALT => { + // TODO: Add cursor above + } + _ => return false, + } + } + vk::INSERT => { + tb.set_overtype(!tb.is_overtype()); + } + vk::DELETE => { + let granularity = if modifiers == kbmod::CTRL { + CursorMovement::Word + } else { + CursorMovement::Grapheme + }; + tb.delete(granularity, 1); + } + vk::A => { + match modifiers { + kbmod::CTRL => tb.select_all(), + _ => return false, + }; + } + vk::X => { + match modifiers { + kbmod::CTRL => self.tui.clipboard = tb.extract_selection(true), + _ => return false, + }; + } + vk::C => { + match modifiers { + kbmod::CTRL => self.tui.clipboard = tb.extract_selection(false), + _ => return false, + }; + } + vk::V => { + match modifiers { + kbmod::CTRL => tb.write(&self.tui.clipboard), + _ => return false, + }; + } + vk::Y => { + match modifiers { + kbmod::CTRL => tb.redo(), + _ => return false, + }; + } + vk::Z => { + match modifiers { + kbmod::CTRL => tb.undo(), + kbmod::ALT => tb.toggle_word_wrap(), + _ => return false, + }; + } + _ => return false, + } + + if !matches!(key, vk::PRIOR | vk::NEXT | vk::UP | vk::DOWN) { + content.preferred_column = tb.get_cursor_visual_pos().x; + } + + self.set_input_consumed(); + return make_cursor_visible; + } + + false + } + + fn textarea_make_cursor_visible(&self, content: &mut TextareaContent, node_prev: &Node) { + let tb = &mut *content.buffer; + let mut scroll_x = content.scroll_offset.x; + let mut scroll_y = content.scroll_offset.y; + + let text_width = tb.get_text_width(); + let cursor_x = tb.get_cursor_visual_pos().x; + scroll_x = scroll_x.min(cursor_x); + scroll_x = scroll_x.max(cursor_x - text_width + 1); + + let viewport_height = node_prev.inner.height(); + let cursor_y = tb.get_cursor_visual_pos().y; + // Scroll up if the cursor is above the visible area. + scroll_y = scroll_y.min(cursor_y); + // Scroll down if the cursor is below the visible area. + scroll_y = scroll_y.max(cursor_y - viewport_height + 1); + + content.scroll_offset.x = scroll_x; + content.scroll_offset.y = scroll_y; + } + + fn textarea_adjust_scroll_offset(&self, content: &mut TextareaContent) { + let tb = &mut *content.buffer; + let mut scroll_x = content.scroll_offset.x; + let mut scroll_y = content.scroll_offset.y; + + if tb.is_word_wrap_enabled() { + scroll_x = 0; + } + + scroll_x = scroll_x.max(0); + scroll_y = scroll_y.clamp(0, tb.get_visual_line_count() - 1); + + content.scroll_offset.x = scroll_x; + content.scroll_offset.y = scroll_y; + } + + pub fn scrollarea_begin(&mut self, classname: &'static str, intrinsic_size: Size) { + self.block_begin(classname); + + let container = self.tree.last_node_mut(); + container.content = NodeContent::Scrollarea(Point::MIN); + + if intrinsic_size.width > 0 || intrinsic_size.height > 0 { + container.intrinsic_size.width = intrinsic_size.width.max(0); + container.intrinsic_size.height = intrinsic_size.height.max(0); + container.intrinsic_size_set = true; + } + + self.block_begin("content"); + self.inherit_focus(); + + // Ensure that attribute modifications apply to the outer container. + self.tree.last_node = container; + } + + pub fn scrollarea_scroll_to(&mut self, pos: Point) { + let container = self.tree.last_node_mut(); + if let NodeContent::Scrollarea(scrollarea) = &mut container.content { + *scrollarea = pos; + } else { + debug_assert!(false); + } + } + + pub fn scrollarea_end(&mut self) { + self.block_end(); // content block + + let container = self.tree.current_node_mut(); + let NodeContent::Scrollarea(scroll_offset) = &mut container.content else { + panic!(); + }; + + if let Some(prev_container) = self.tui.get_prev_node(container.id) { + if *scroll_offset == Point::MIN { + if let NodeContent::Scrollarea(prev_offset) = &prev_container.content { + *scroll_offset = *prev_offset; + } else { + debug_assert!(false); + }; + } + + if !self.input_consumed && self.tui.mouse_state != InputMouseState::None { + let container_rect = prev_container.inner; + + if self.tui.mouse_state == InputMouseState::Scroll { + if container_rect.contains(self.tui.mouse_position) { + scroll_offset.x += self.input_scroll_delta.x; + scroll_offset.y += self.input_scroll_delta.y; + self.set_input_consumed(); + } + } else if let InputMouseGesture::Drag(delta) = self.input_mouse_gesture { + // We don't need to look up the previous track node, + // since it has a fixed size based on the container size. + let track_rect = Rect { + left: container_rect.right, + top: container_rect.top, + right: container_rect.right + 1, + bottom: container_rect.bottom, + }; + if track_rect.contains(self.tui.mouse_down_position) { + let content = Tree::node_ref(prev_container.children.first).unwrap(); + let content_rect = content.inner; + let track_height = track_rect.height(); + let content_height = content_rect.height(); + + if content_height > track_height { + scroll_offset.y += + delta.y * (content_height + track_height) / track_height; + } + + self.set_input_consumed(); + } + } + } + } + + self.block_end(); // outer container + } + + pub fn list_begin(&mut self, classname: &'static str) { + self.block_begin(classname); + + let last_node = self.tree.last_node_mut(); + last_node.content = NodeContent::List; + } + + pub fn list_item(&mut self, overflow: Overflow, text: &str) -> bool { + let parent = self.tree.current_node_ref(); + self.next_block_id_mixin(parent.child_count as u64); + self.styled_label_begin("item", overflow); + self.attr_focusable(); + + let has_focus = self.is_focused(); + self.styled_label_add_text(if has_focus { "> " } else { " " }); + self.styled_label_add_text(text); + + self.styled_label_end(); + self.button_activated() + } + + pub fn list_end(&mut self) { + self.block_end(); + } + + pub fn menubar_begin(&mut self) { + self.table_begin("menubar"); + self.table_next_row(); + } + + pub fn menubar_menu_begin(&mut self, text: &str, accelerator: char) -> bool { + let row = self.tree.current_node_ref(); + + self.next_block_id_mixin(row.child_count as u64); + self.menubar_label(text, accelerator); + self.attr_focusable(); + self.attr_padding(Rect::two(0, 1)); + + if self.consume_shortcut(kbmod::ALT | InputKey::new(accelerator as u32)) { + self.steal_focus(); + } + + if self.contains_focus() { + if self.consume_shortcut(vk::ESCAPE) { + // TODO: This should reassign the previous focused path. + self.needs_settling = true; + self.tui.focused_node_path.clear(); + self.tui.focused_node_path.push(ROOT_ID); + return false; + } + + self.attr_background_rgba(self.indexed(IndexedColor::White)); + + self.table_begin("flyout"); + self.attr_float(FloatSpec { + anchor: Anchor::Last, + gravity_x: 0.0, + gravity_y: 0.0, + offset_x: 0, + offset_y: 1, + }); + self.attr_border(); + self.attr_background_rgba(self.indexed(IndexedColor::White)); + return true; + } + + false + } + + pub fn menubar_menu_item(&mut self, text: &str, accelerator: char, shortcut: InputKey) -> bool { + self.table_next_row(); + self.attr_focusable(); + if self.is_focused() { + self.attr_background_rgba(self.indexed(IndexedColor::BrightBlue)); + } + + let clicked = + self.button_activated() || self.consume_shortcut(InputKey::new(accelerator as u32)); + + self.menubar_label(text, accelerator); + self.menubar_shortcut(shortcut); + + if clicked { + // TODO: This should reassign the previous focused path. + self.needs_settling = true; + self.tui.focused_node_path.clear(); + self.tui.focused_node_path.push(ROOT_ID); + } + + clicked + } + + pub fn menubar_menu_end(&mut self) { + self.table_end(); + } + + pub fn menubar_end(&mut self) { + let menu_row = self.tree.current_node_ref(); + self.table_end(); + + let focus_path = &self.tui.focused_node_path[..]; + if self.input_consumed + || focus_path.len() < 4 + || focus_path[focus_path.len() - 3] != menu_row.id + { + return; + } + let Some(input) = self.input_keyboard else { + return; + }; + if !matches!(input, vk::LEFT | vk::RIGHT | vk::UP | vk::DOWN) { + return; + } + + let container; + let element_id; + + if input == vk::LEFT || input == vk::RIGHT { + container = menu_row; + element_id = focus_path[focus_path.len() - 4]; + } else { + let flyout = unsafe { self.tree.root_last.as_ref().unwrap() }; + container = flyout; + element_id = if focus_path.len() == 6 && focus_path[focus_path.len() - 5] == flyout.id { + focus_path[focus_path.len() - 6] + } else { + 0 + }; + } + + // In an unnested menu like ours, going up/left and down/right respectively is the same. + // The only thing that changes is the layout direction, which we don't care about. + let focused_node = Tree::iterate_siblings(container.children.first) + .find(|node| node.id == element_id) + .and_then(|node| { + if input == vk::LEFT || input == vk::UP { + Tree::node_ref(node.siblings.prev) + } else { + Tree::node_ref(node.siblings.next) + } + }) + .or_else(|| { + if input == vk::LEFT || input == vk::UP { + Tree::node_ref(container.children.last) + } else { + Tree::node_ref(container.children.first) + } + }) + .unwrap(); + + Tui::build_node_path(Some(focused_node), &mut self.tui.focused_node_path); + self.needs_settling = true; + + self.set_input_consumed(); + } + + fn menubar_label(&mut self, text: &str, accelerator: char) { + if !accelerator.is_ascii_uppercase() { + self.label("label", Overflow::Clip, text); + return; + } + + let mut off = 0; + while off < text.len() { + let mut c = text.as_bytes()[off]; + c &= !0x20; // transform `c` to uppercase + if c == accelerator as u8 { + break; + } + off += 1; + } + + self.styled_label_begin("label", Overflow::Clip); + + if off < text.len() { + // Highlight the accelerator in red. + self.styled_label_add_text(&text[..off]); + self.styled_label_set_foreground_indexed(Some(IndexedColor::BrightRed)); + self.styled_label_add_text(&text[off..off + 1]); + self.styled_label_set_foreground_indexed(None); + self.styled_label_add_text(&text[off + 1..]); + } else { + // Add the accelerator in parentheses (still in red). + let ch = accelerator as u8; + self.styled_label_add_text(text); + self.styled_label_add_text("("); + self.styled_label_set_foreground_indexed(Some(IndexedColor::BrightRed)); + self.styled_label_add_text(unsafe { helpers::str_from_raw_parts(&ch, 1) }); + self.styled_label_set_foreground_indexed(None); + self.styled_label_add_text(")"); + } + + self.styled_label_end(); + } + + fn menubar_shortcut(&mut self, shortcut: InputKey) { + let shortcut_letter = shortcut.value() as u8 as char; + if shortcut_letter.is_ascii_uppercase() { + let mut shortcut_text = String::new(); + if shortcut.modifiers_contains(kbmod::CTRL) { + shortcut_text.push_str("Ctrl+"); + } + if shortcut.modifiers_contains(kbmod::ALT) { + shortcut_text.push_str("Alt+"); + } + if shortcut.modifiers_contains(kbmod::SHIFT) { + shortcut_text.push_str("Shift+"); + } + shortcut_text.push(shortcut_letter); + + self.label("shortcut", Overflow::Clip, &shortcut_text); + self.attr_padding(Rect { + left: 2, + top: 0, + right: 0, + bottom: 0, + }); + } else { + self.block_begin("shortcut"); + self.block_end(); + } + } +} + +enum VisitControl { + Continue, + SkipChildren, + Stop, +} + +struct Tree { + tail: *const Node, + root_first: *const Node, + root_last: *const Node, + last_node: *const Node, + current_node: *const Node, + count: usize, + checksum: u64, +} + +impl Tree { + fn new() -> Self { + let mut tree = Self::default(); + tree.append_child(Node { + id: ROOT_ID, + classname: "root", + ..Default::default() + }); + tree.root_first = tree.tail; + tree.root_last = tree.tail; + tree.last_node = tree.tail; + tree.current_node = tree.tail; + tree + } + + fn append_child(&mut self, node: Node) { + let node = Box::leak(Box::new(node)); + + if let Some(parent) = Tree::node_mut(self.current_node) { + parent.append_child(node); + } + + node.prev = self.tail; + if let Some(tail) = Tree::node_mut(self.tail) { + tail.next = node; + } + self.tail = node; + + self.last_node = node; + self.current_node = node; + self.count += 1; + // wymix is weak, but both checksum and node.id are proper random, so... it's not *that* bad. + self.checksum = wymix(self.checksum, node.id); + } + + fn pop_stack(&mut self) { + let current_node = self.current_node_ref(); + self.last_node = current_node; + self.current_node = current_node.stack_parent; + } + + fn last_node_ref<'a>(&self) -> &'a Node { + debug_assert!(!self.last_node.is_null()); + unsafe { &*(self.last_node as *const _) } + } + + fn last_node_mut<'a>(&self) -> &'a mut Node { + debug_assert!(!self.last_node.is_null()); + unsafe { &mut *(self.last_node as *mut _) } + } + + fn current_node_ref<'a>(&self) -> &'a Node { + debug_assert!(!self.current_node.is_null()); + unsafe { &*(self.current_node as *const _) } + } + + fn current_node_mut<'a>(&self) -> &'a mut Node { + debug_assert!(!self.current_node.is_null()); + unsafe { &mut *(self.current_node as *mut _) } + } + + fn node_ref<'a>(node: *const Node) -> Option<&'a Node> { + unsafe { node.as_ref() } + } + + // This (and node_ref) are unsafe, unsound, and whatever else you want to call it. + // But there was major time crunch and Rust is such a pain in the ass when it comes to building trees. + // I used RefCell first and that was just absolutely awful. + fn node_mut<'a>(node: *const Node) -> Option<&'a mut Node> { + unsafe { (node as *mut Node).as_mut() } + } + + fn iterate_siblings<'a>(mut node: *const Node) -> impl Iterator { + iter::from_fn(move || { + if node.is_null() { + None + } else { + let n = unsafe { &mut *(node as *mut Node) }; + node = n.siblings.next; + Some(n) + } + }) + } + + fn iterate_roots<'a>(&self) -> impl Iterator + use<'a> { + Self::iterate_siblings(self.root_first) + } + + /// Visits all nodes under and including `root` in depth order. + /// Starts with node `start`. + /// + /// WARNING: Breaks in hilarious ways if `start` is not within `root`. + #[inline] + fn visit_all VisitControl>( + root: *const Node, + start: *const Node, + mut depth: usize, + forward: bool, + mut cb: T, + ) { + if root.is_null() || start.is_null() { + return; + } + + let mut node = unsafe { &*start }; + let children_idx = if forward { + NodeChildren::FIRST + } else { + NodeChildren::LAST + }; + let siblings_idx = if forward { + NodeSiblings::NEXT + } else { + NodeSiblings::PREV + }; + + while { + 'traverse: { + match cb(depth, node) { + VisitControl::Continue => { + // Depth first search: It has a child? Go there. + if !node.children.get(children_idx).is_null() { + node = unsafe { &*node.children.get(children_idx) }; + depth += 1; + break 'traverse; + } + } + VisitControl::SkipChildren => {} + VisitControl::Stop => return, + } + + // Out of children? Go back to the parent. + while node.siblings.get(siblings_idx).is_null() && !ptr::eq(node, root) { + node = unsafe { &*node.parent }; + depth -= 1; + } + + // If `start != root`, this ensures we restart the traversal at `root` until we hit `start` again. + // Otherwise, this will continue above, hit the if condition, and break out of the loop. + if ptr::eq(node, root) { + break 'traverse; + } + + // Go to the parent's next sibling. --> Next subtree. + node = unsafe { &*node.siblings.get(siblings_idx) }; + } + + // We're done once we wrapped around to the `start`. + !ptr::eq(node, start) + } {} + } +} + +impl Drop for Tree { + fn drop(&mut self) { + let mut node = self.root_first; + while !node.is_null() { + let next = unsafe { (*node).next }; + unsafe { + let _ = Box::from_raw(node as *mut Node); + } + node = next; + } + } +} + +impl Default for Tree { + fn default() -> Self { + Self { + tail: null(), + root_first: null(), + root_last: null(), + last_node: null(), + current_node: null(), + count: 0, + checksum: 0, + } + } +} + +#[derive(Default)] +pub enum Anchor { + #[default] + Last, + Parent, + Root, +} + +#[derive(Default)] +pub struct FloatSpec { + pub anchor: Anchor, + // Specifies the origin of the container relative to the container size. [0, 1] + pub gravity_x: f32, + pub gravity_y: f32, + // Specifies an offset from the origin in cells. + pub offset_x: CoordType, + pub offset_y: CoordType, +} + +struct FloatAttributes { + // Specifies the origin of the container relative to the container size. [0, 1] + gravity_x: f32, + gravity_y: f32, + // Specifies an offset from the origin in cells. + offset: Point, +} + +#[derive(Default)] +pub enum Alignment { + #[default] + Left, + Center, + Right, +} + +#[derive(Default)] +struct Attributes { + float: Option, + align: Alignment, + padding: Rect, + bg: u32, + fg: u32, + bordered: bool, + focusable: bool, + focus_brackets: bool, +} + +struct TableContent { + columns: Vec, + cell_gap: Size, +} + +struct StyledTextChunk { + text: String, + fg: u32, +} + +impl Document for Vec { + fn read_backward(&self, mut off: usize) -> &[u8] { + for chunk in self.iter().rev() { + if off < chunk.text.len() { + return &chunk.text.as_bytes()[chunk.text.len() - off..]; + } + off -= chunk.text.len(); + } + &[] + } + + fn read_forward(&self, mut off: usize) -> &[u8] { + for chunk in self.iter() { + if off < chunk.text.len() { + return &chunk.text.as_bytes()[off..]; + } + off -= chunk.text.len(); + } + &[] + } +} + +struct TextContent { + chunks: Vec, + overflow: Overflow, +} + +struct TextareaContent { + buffer: RcTextBuffer, + scroll_offset: Point, + preferred_column: CoordType, + // TODO: Double click detection should be in `Tui`. + last_click: std::time::Instant, + single_line: bool, + has_focus: bool, +} + +#[derive(Default)] +enum NodeContent { + #[default] + None, + List, + Modal(String), // title + Table(TableContent), + Text(TextContent), + Textarea(TextareaContent), + Scrollarea(Point), // scroll offset +} + +struct ScrollareaThumb { + top: CoordType, + bottom: CoordType, +} + +struct NodeSiblings { + prev: *const Node, + next: *const Node, +} + +impl NodeSiblings { + const PREV: usize = 0; + const NEXT: usize = 1; + + fn get(&self, off: usize) -> *const Node { + match off & 1 { + 0 => self.prev, + 1 => self.next, + _ => unreachable!(), + } + } +} + +struct NodeChildren { + first: *const Node, + last: *const Node, +} + +impl NodeChildren { + const FIRST: usize = 0; + const LAST: usize = 1; + + fn get(&self, off: usize) -> *const Node { + match off & 1 { + 0 => self.first, + 1 => self.last, + _ => unreachable!(), + } + } +} + +pub struct Node { + prev: *const Node, + next: *const Node, + stack_parent: *const Node, + + id: u64, + classname: &'static str, + parent: *const Node, + siblings: NodeSiblings, + children: NodeChildren, + child_count: usize, + + attributes: Attributes, + content: NodeContent, + + intrinsic_size: Size, + intrinsic_size_set: bool, + outer: Rect, // in screen-space, calculated during layout + inner: Rect, // in screen-space, calculated during layout + outer_clipped: Rect, // in screen-space, calculated during layout, restricted to the viewport + inner_clipped: Rect, // in screen-space, calculated during layout, restricted to the viewport +} + +impl Default for Node { + fn default() -> Self { + Node { + prev: null(), + next: null(), + stack_parent: null(), + + id: 0, + classname: "", + parent: null(), + siblings: NodeSiblings { + prev: null(), + next: null(), + }, + children: NodeChildren { + first: null(), + last: null(), + }, + child_count: 0, + + attributes: Default::default(), + content: Default::default(), + + intrinsic_size: Default::default(), + intrinsic_size_set: false, + outer: Default::default(), + inner: Default::default(), + outer_clipped: Default::default(), + inner_clipped: Default::default(), + } + } +} + +impl Node { + fn outer_to_inner(&self, mut outer: Rect) -> Rect { + let l = self.attributes.bordered || self.attributes.focus_brackets; + let t = self.attributes.bordered; + let r = self.attributes.bordered + || self.attributes.focus_brackets + || matches!(self.content, NodeContent::Scrollarea(..)); + let b = self.attributes.bordered; + + outer.left += self.attributes.padding.left + l as CoordType; + outer.top += self.attributes.padding.top + t as CoordType; + outer.right -= self.attributes.padding.right + r as CoordType; + outer.bottom -= self.attributes.padding.bottom + b as CoordType; + outer + } + + fn intrinsic_to_outer(&self) -> Size { + let l = self.attributes.bordered || self.attributes.focus_brackets; + let t = self.attributes.bordered; + let r = self.attributes.bordered + || self.attributes.focus_brackets + || matches!(self.content, NodeContent::Scrollarea(..)); + let b = self.attributes.bordered; + + let mut size = self.intrinsic_size; + size.width += self.attributes.padding.left + + self.attributes.padding.right + + l as CoordType + + r as CoordType; + size.height += self.attributes.padding.top + + self.attributes.padding.bottom + + t as CoordType + + b as CoordType; + size + } + + fn compute_intrinsic_size(&mut self) { + match &mut self.content { + NodeContent::Table(spec) => { + // Calculate each row's height and the maximum width of each of its columns. + for row in Tree::iterate_siblings(self.children.first) { + let mut row_height = 0; + + for (column, cell) in Tree::iterate_siblings(row.children.first).enumerate() { + cell.compute_intrinsic_size(); + + let size = cell.intrinsic_to_outer(); + + // If the spec.columns[] value is positive, it's an absolute width. + // Otherwise, it's a fraction of the remaining space. + // + // TODO: The latter is computed incorrectly. + // Example: If the items are "a","b","c" then the intrinsic widths are [1,1,1]. + // If the column spec is [0,-3,-1], then this code assigns an intrinsic row + // width of 3, but it should be 5 (1+1+3), because the spec says that the + // last column (flexible 1/1) must be 3 times as wide as the 2nd one (1/3rd). + // It's not a big deal yet, because such functionality isn't needed just yet. + if column >= spec.columns.len() { + spec.columns.push(0); + } + spec.columns[column] = spec.columns[column].max(size.width); + + row_height = row_height.max(size.height); + } + + row.intrinsic_size.height = row_height; + } + + // Assuming each column has the width of the widest cell in that column, + // calculate the total width of the table. + let total_gap_width = + spec.cell_gap.width * spec.columns.len().saturating_sub(1) as CoordType; + let total_inner_width = spec.columns.iter().sum::() + total_gap_width; + let mut total_width = 0; + let mut total_height = 0; + + // Assign the total width to each row. + for row in Tree::iterate_siblings(self.children.first) { + row.intrinsic_size.width = total_inner_width; + row.intrinsic_size_set = true; + + let size = row.intrinsic_to_outer(); + total_width = total_width.max(size.width); + total_height += size.height; + } + + let total_gap_height = + spec.cell_gap.height * self.child_count.saturating_sub(1) as CoordType; + total_height += total_gap_height; + + // Assign the total width/height to the table. + self.intrinsic_size.width = total_width; + self.intrinsic_size.height = total_height; + self.intrinsic_size_set = true; + } + _ => { + let mut max_width = 0; + let mut total_height = 0; + + for child in Tree::iterate_siblings(self.children.first) { + child.compute_intrinsic_size(); + + let size = child.intrinsic_to_outer(); + max_width = max_width.max(size.width); + total_height += size.height; + } + + if !self.intrinsic_size_set { + self.intrinsic_size.width = max_width; + self.intrinsic_size.height = total_height; + self.intrinsic_size_set = true; + } + + /* + + let mut row_size = Size { + width: 0, + height: 0, + }; + let mut total_size = Size { + width: 0, + height: 0, + }; + let columns = self.attributes.grid_columns.len().max(1); + let mut column = 0; + + for child in Tree::iterate_siblings(self.children.first) { + child.compute_intrinsic_size(); + + let size = child.intrinsic_to_outer(); + row_size.width += size.width; + row_size.height = row_size.height.max(size.height); + + column += 1; + if column >= columns { + total_size.width = total_size.width.max(row_size.width); + total_size.height += row_size.height; + row_size = Size { + width: 0, + height: 0, + }; + column = 0; + } + } + + total_size.width = total_size.width.max(row_size.width); + total_size.height += row_size.height; + + if !self.intrinsic_size_set { + self.intrinsic_size = total_size; + self.intrinsic_size_set = true; + } + */ + } + } + } + + fn layout_children(&mut self, clip: Rect) { + if self.children.first.is_null() || self.inner.is_empty() { + return; + } + + match &mut self.content { + NodeContent::Table(spec) => { + let width = self.inner.right - self.inner.left; + let mut x = self.inner.left; + let mut y = self.inner.top; + + for row in Tree::iterate_siblings(self.children.first) { + let mut size = row.intrinsic_to_outer(); + size.width = width; + row.outer.left = x; + row.outer.top = y; + row.outer.right = x + size.width; + row.outer.bottom = y + size.height; + row.outer = row.outer.intersect(self.inner); + row.inner = row.outer_to_inner(row.outer); + row.outer_clipped = row.outer.intersect(clip); + row.inner_clipped = row.inner.intersect(clip); + + let mut row_height = 0; + + for (column, cell) in Tree::iterate_siblings(row.children.first).enumerate() { + let mut size = cell.intrinsic_to_outer(); + size.width = spec.columns[column]; + cell.outer.left = x; + cell.outer.top = y; + cell.outer.right = x + size.width; + cell.outer.bottom = y + size.height; + cell.outer = cell.outer.intersect(self.inner); + cell.inner = cell.outer_to_inner(cell.outer); + cell.outer_clipped = cell.outer.intersect(clip); + cell.inner_clipped = cell.inner.intersect(clip); + + x += size.width + spec.cell_gap.width; + row_height = row_height.max(size.height); + + cell.layout_children(clip); + } + + x = self.inner.left; + y += row_height + spec.cell_gap.height; + } + } + NodeContent::Scrollarea(pos) => { + let Some(content) = Tree::node_mut(self.children.first) else { + unreachable!(); + }; + + // content available viewport size (-1 for the track) + let sx = self.inner.right - self.inner.left; + let sy = self.inner.bottom - self.inner.top; + // actual content size + let cx = sx; + let cy = content.intrinsic_size.height.max(sy); + // scroll offset + let ox = 0; + let oy = pos.y.clamp(0, cy - sy); + + pos.x = ox; + pos.y = oy; + + content.outer.left = self.inner.left - ox; + content.outer.top = self.inner.top - oy; + content.outer.right = content.outer.left + cx; + content.outer.bottom = content.outer.top + cy; + content.inner = content.outer_to_inner(content.outer); + content.outer_clipped = content.outer.intersect(self.inner_clipped); + content.inner_clipped = content.inner.intersect(self.inner_clipped); + + content.layout_children(content.inner_clipped); + } + _ => { + let width = self.inner.right - self.inner.left; + let x = self.inner.left; + let mut y = self.inner.top; + + for child in Tree::iterate_siblings(self.children.first) { + let mut size = child.intrinsic_to_outer(); + let remaining = width - size.width; + size.width = width; + child.outer.left = match child.attributes.align { + Alignment::Left => x, + Alignment::Center => x + remaining / 2, + Alignment::Right => x + remaining, + }; + child.outer.top = y; + child.outer.right = x + size.width; + child.outer.bottom = y + size.height; + child.outer = child.outer.intersect(self.inner); + child.inner = child.outer_to_inner(child.outer); + child.outer_clipped = child.outer.intersect(clip); + child.inner_clipped = child.inner.intersect(clip); + y += size.height; + } + + for child in Tree::iterate_siblings(self.children.first) { + child.layout_children(clip); + } + + /* + + let mut columns = &mut self.attributes.grid_columns[..]; + let mut default_width = 1; + if columns.is_empty() { + columns = slice::from_mut(&mut default_width); + } + + // TODO: We can skip this for nodes without a grid layout. + let mut intrinsic_column_width = vec![0; columns.len()]; + let mut column = 0; + + for child in Tree::iterate_siblings(self.children.first) { + let size = child.intrinsic_to_outer(); + intrinsic_column_width[column] = intrinsic_column_width[column].max(size.width); + + column += 1; + if column >= columns.len() { + column = 0; + } + } + + { + let mut total_abs_widths = 0; + let mut total_fr_widths = 0; + + for i in 0..columns.len() { + if columns[i] > 0 { + total_fr_widths += columns[i]; + } else { + total_abs_widths += intrinsic_column_width[i]; + } + } + + let mut fr_scale = 0.0; + if total_fr_widths > 0 { + let remaining = (self.inner.right - self.inner.left) - total_abs_widths; + let remaining = remaining.max(0); + // `unit` will be negative and invert the `grid_widths` each to a positive value. + fr_scale = remaining as f64 / total_fr_widths as f64; + } + + for i in 0..columns.len() { + if columns[i] > 0 { + columns[i] = (columns[i] as f64 * fr_scale + 0.5) as CoordType; + } else { + columns[i] = intrinsic_column_width[i]; + } + } + } + + let mut x = self.inner.left; + let mut y = self.inner.top; + let mut row_height = 0; + let mut column = 0; + + for child in Tree::iterate_siblings(self.children.first) { + let mut size = child.intrinsic_to_outer(); + size.width = columns[column]; + + child.outer.left = x; + child.outer.top = y; + child.outer.right = x + size.width; + child.outer.bottom = y + size.height; + child.outer = child.outer.intersect(self.inner); + child.inner = child.outer_to_inner(child.outer); + child.outer_clipped = child.outer.intersect(clip); + child.inner_clipped = child.inner.intersect(clip); + + x += size.width; + row_height = row_height.max(size.height); + column += 1; + + if column >= columns.len() { + x = self.inner.left; + y += row_height; + row_height = 0; + column = 0; + } + } + + for child in Tree::iterate_siblings(self.children.first) { + child.layout_children(clip); + } + */ + } + } + } + + fn append_child(&mut self, child: &mut Self) { + // The child node is supposed to not be part of any tree. + assert!(child.siblings.prev.is_null() && child.siblings.next.is_null()); + + child.parent = self; + child.siblings.prev = self.children.last; + + if let Some(child_last) = Tree::node_mut(self.children.last) { + child_last.siblings.next = child; + } + if self.children.first.is_null() { + self.children.first = child; + } + self.children.last = child; + self.child_count += 1; + } + + fn remove_from_parent(&mut self) { + let Some(parent) = Tree::node_mut(self.parent) else { + return; + }; + + if let Some(sibling_prev) = Tree::node_mut(self.siblings.prev) { + sibling_prev.siblings.next = self.siblings.next; + } + if let Some(sibling_next) = Tree::node_mut(self.siblings.next) { + sibling_next.siblings.prev = self.siblings.prev; + } + if ptr::eq(parent.children.first, self) { + parent.children.first = self.siblings.next; + } + if ptr::eq(parent.children.last, self) { + parent.children.last = self.siblings.prev; + } + parent.child_count -= 1; + + self.parent = null(); + self.siblings.prev = null(); + self.siblings.next = null(); + } +} diff --git a/src/ucd.rs b/src/ucd.rs new file mode 100644 index 0000000..e0f69c6 --- /dev/null +++ b/src/ucd.rs @@ -0,0 +1,705 @@ +use crate::helpers::{CoordType, Point}; +use crate::memchr::{memchr2, memrchr2}; +use crate::ucd_gen::*; +use crate::utf8::Utf8Chars; +use std::cmp::Ordering; + +pub trait Document { + fn read_backward(&self, off: usize) -> &[u8]; + fn read_forward(&self, off: usize) -> &[u8]; +} + +impl Document for &[u8] { + fn read_backward(&self, off: usize) -> &[u8] { + let s = *self; + &s[..off.min(s.len())] + } + + fn read_forward(&self, off: usize) -> &[u8] { + let s = *self; + &s[off.min(s.len())..] + } +} + +#[derive(Clone, Copy, Default)] +pub struct UcdCursor { + /// Offset in bytes within the buffer. + pub offset: usize, + /// Position in the buffer in lines (.y) and grapheme clusters (.x). + /// Line wrapping has NO influence on this. + pub logical_pos: Point, + /// Position in the buffer in laid out rows (.y) and columns (.x). + /// Line wrapping has an influence on this. + pub visual_pos: Point, + /// Horizontal position in visual columns. + /// Line wrapping has NO influence on this and if word wrap is disabled, + /// it's identical to `visual_pos.x`. This is useful for calculating tab widths. + pub column: CoordType, +} + +pub struct WrapOpportunity { + absolute_offset: usize, + offset_next_cluster: usize, + props_next_cluster: usize, + logical_pos_x: CoordType, +} + +pub struct MeasurementConfig<'doc> { + buffer: &'doc dyn Document, + tab_size: CoordType, + word_wrap_column: CoordType, + cursor: UcdCursor, +} + +impl<'doc> MeasurementConfig<'doc> { + pub fn new(buffer: &'doc dyn Document) -> Self { + Self { + buffer, + tab_size: 8, + word_wrap_column: CoordType::MAX, + cursor: UcdCursor::default(), + } + } + + pub fn with_tab_size(mut self, tab_size: CoordType) -> Self { + self.tab_size = tab_size; + self + } + + pub fn with_word_wrap_column(mut self, word_wrap_column: CoordType) -> Self { + self.word_wrap_column = word_wrap_column; + self + } + + pub fn with_cursor(mut self, cursor: UcdCursor) -> Self { + self.cursor = cursor; + self + } + + pub fn goto_offset(&mut self, offset: usize) -> UcdCursor { + self.cursor = Self::measure_forward( + self.tab_size, + self.word_wrap_column, + offset, + Point::MAX, + Point::MAX, + self.cursor, + self.buffer, + ); + self.cursor + } + + pub fn goto_logical(&mut self, logical_target: Point) -> UcdCursor { + self.cursor = Self::measure_forward( + self.tab_size, + self.word_wrap_column, + usize::MAX, + logical_target, + Point::MAX, + self.cursor, + self.buffer, + ); + self.cursor + } + + pub fn goto_visual(&mut self, visual_target: Point) -> UcdCursor { + self.cursor = Self::measure_forward( + self.tab_size, + self.word_wrap_column, + usize::MAX, + Point::MAX, + visual_target, + self.cursor, + self.buffer, + ); + self.cursor + } + + pub fn cursor(&self) -> UcdCursor { + self.cursor + } + + fn measure_forward( + tab_size: CoordType, + word_wrap_column: CoordType, + offset_target: usize, + logical_target: Point, + visual_target: Point, + cursor: UcdCursor, + buffer: &dyn Document, + ) -> UcdCursor { + if cursor.logical_pos >= logical_target || cursor.visual_pos >= visual_target { + return cursor; + } + + let mut wrap: Option = None; + let mut hit: Option = None; + let mut absolute_offset = cursor.offset; + let mut logical_pos_x = cursor.logical_pos.x; + let mut logical_pos_y = cursor.logical_pos.y; + let mut visual_pos_x = cursor.visual_pos.x; + let mut visual_pos_y = cursor.visual_pos.y; + let mut column = cursor.column; + let (mut offset_target_x, mut logical_target_x, mut visual_target_x) = Self::recalc_target( + offset_target, + logical_target, + visual_target, + logical_pos_y, + visual_pos_y, + ); + + 'outer: loop { + let chunk = buffer.read_forward(absolute_offset); + let chunk_beg = absolute_offset; + let chunk_end = absolute_offset + chunk.len(); + let mut it = Utf8Chars::new(chunk, 0); + let Some(mut ch) = it.next() else { + break; + }; + + let mut props_next_cluster = ucd_grapheme_cluster_lookup(ch); + + loop { + if absolute_offset >= chunk_end { + break; + } + if absolute_offset >= offset_target_x + || logical_pos_x >= logical_target_x + || visual_pos_x >= visual_target_x + { + if wrap.is_none() { + break 'outer; + } + + hit = Some(UcdCursor { + offset: absolute_offset, + logical_pos: Point { + x: logical_pos_x, + y: logical_pos_y, + }, + visual_pos: Point { + x: visual_pos_x, + y: visual_pos_y, + }, + column, + }); + // Prevent hits on the same line until we encounter a line wrap or explicit newline. + offset_target_x = usize::MAX; + logical_target_x = CoordType::MAX; + visual_target_x = CoordType::MAX; + } + + let props_current_cluster = props_next_cluster; + let is_tab = ch == '\t'; + let mut offset_next_cluster; + let mut width = 0; + let mut state = 0; + + // Figure out the length and width of the rest of the grapheme cluster. + loop { + offset_next_cluster = it.offset(); + width += ucd_grapheme_cluster_character_width(props_next_cluster) as CoordType; + + let Some(ch_next) = it.next() else { + break; + }; + + ch = ch_next; + let props_trail = ucd_grapheme_cluster_lookup(ch); + state = ucd_grapheme_cluster_joins(state, props_next_cluster, props_trail); + props_next_cluster = props_trail; + + if ucd_grapheme_cluster_joins_done(state) { + break; + } + } + + let offset_next_cluster = chunk_beg + offset_next_cluster; + + if is_tab { + // Tabs require special handling because they can have a variable width. + width = tab_size - (column % tab_size); + } else { + width = width.min(2); + } + + // Hard wrap: Both the logical and visual position advance by one line. + if ucd_grapheme_cluster_is_newline(props_current_cluster) { + // Don't cross the newline if the target is on this line. + // E.g. if the callers asks for column 100 on a 10 column line, + // we'll return with the cursor set to column 10. + if logical_pos_y >= logical_target.y || visual_pos_y >= visual_target.y { + break 'outer; + } + + logical_pos_x = 0; + logical_pos_y += 1; + visual_pos_x = 0; + visual_pos_y += 1; + column = 0; + // We moved the logical/visual pos past the newline, + // so we also need to move the offset past it. + absolute_offset = offset_next_cluster; + (offset_target_x, logical_target_x, visual_target_x) = Self::recalc_target( + offset_target, + logical_target, + visual_target, + logical_pos_y, + visual_pos_y, + ); + continue; + } + + // Line/word-wrap handling. + if word_wrap_column != CoordType::MAX && visual_pos_x + width > word_wrap_column { + // Reset to the last break opportunity, if there was any. + if let Some(ref w) = wrap { + absolute_offset = w.absolute_offset; + it.seek(w.offset_next_cluster); + props_next_cluster = w.props_next_cluster; + logical_pos_x = w.logical_pos_x; + } + + // Wrap! + visual_pos_x = 0; + visual_pos_y += 1; + (offset_target_x, logical_target_x, visual_target_x) = Self::recalc_target( + offset_target, + logical_target, + visual_target, + logical_pos_y, + visual_pos_y, + ); + wrap = None; + hit = None; + + if absolute_offset < chunk_beg { + // We've had to reset to a point before this chunk, + // so we have to re-read the previous contents. + break; + } + + continue; + } + + // Avoid advancing past the visual target, because `width` can be greater than 1. + if visual_pos_x + width > visual_target_x { + if word_wrap_column == CoordType::MAX || wrap.is_none() { + break 'outer; + } + + hit = Some(UcdCursor { + offset: absolute_offset, + logical_pos: Point { + x: logical_pos_x, + y: logical_pos_y, + }, + visual_pos: Point { + x: visual_pos_x, + y: visual_pos_y, + }, + column, + }); + // Prevent hits on the same line until we encounter a line wrap or explicit newline. + offset_target_x = usize::MAX; + logical_target_x = CoordType::MAX; + visual_target_x = CoordType::MAX; + } + + absolute_offset = offset_next_cluster; + logical_pos_x += 1; + visual_pos_x += width; + column += width; + + if word_wrap_column != CoordType::MAX + && !ucd_line_break_joins(props_current_cluster, props_next_cluster) + { + if hit.is_some() { + break 'outer; + } + wrap = Some(WrapOpportunity { + absolute_offset, + offset_next_cluster: it.offset(), + props_next_cluster, + logical_pos_x, + }); + } + } + } + + if visual_pos_x >= word_wrap_column { + visual_pos_x = 0; + visual_pos_y += 1; + } + + if let Some(c) = hit { + return c; + } + + UcdCursor { + offset: absolute_offset, + logical_pos: Point { + x: logical_pos_x, + y: logical_pos_y, + }, + visual_pos: Point { + x: visual_pos_x, + y: visual_pos_y, + }, + column, + } + } + + #[inline] + fn recalc_target( + offset_target: usize, + logical_target: Point, + visual_target: Point, + logical_pos_y: CoordType, + visual_pos_y: CoordType, + ) -> (usize, CoordType, CoordType) { + ( + offset_target, + Self::target_column(logical_target, logical_pos_y), + Self::target_column(visual_target, visual_pos_y), + ) + } + + #[inline] + fn target_column(target: Point, y: CoordType) -> CoordType { + match y.cmp(&target.y) { + Ordering::Less => CoordType::MAX, + Ordering::Equal => target.x, + Ordering::Greater => 0, + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum CharClass { + Whitespace, + Newline, + Separator, + Word, +} + +const fn construct_classifier(seperators: &[u8]) -> [CharClass; 256] { + let mut classifier = [CharClass::Word; 256]; + + classifier[b' ' as usize] = CharClass::Whitespace; + classifier[b'\t' as usize] = CharClass::Whitespace; + classifier[b'\n' as usize] = CharClass::Newline; + classifier[b'\r' as usize] = CharClass::Newline; + + let mut i = 0; + let len = seperators.len(); + while i < len { + let ch = seperators[i]; + assert!(ch < 128, "Only ASCII separators are supported."); + classifier[ch as usize] = CharClass::Separator; + i += 1; + } + + classifier +} + +const WORD_CLASSIFIER: [CharClass; 256] = + construct_classifier(br#"`~!@#$%^&*()-=+[{]}\|;:'",.<>/?"#); + +/// Finds the next word boundary given a document cursor offset. +/// Returns the offset of the next word boundary. +pub fn word_forward(doc: &dyn Document, offset: usize) -> usize { + word_navigation(WordForward { + doc, + offset, + chunk: &[], + chunk_off: 0, + }) +} + +/// The backward version of `word_forward`. +pub fn word_backward(doc: &dyn Document, offset: usize) -> usize { + word_navigation(WordBackward { + doc, + offset, + chunk: &[], + chunk_off: 0, + }) +} + +/// Word navigation implementation. Matches the behavior of VS Code. +fn word_navigation(mut nav: T) -> usize { + // First skip one newline, if any. + nav.skip_newline(); + + // Skip any whitespace. + nav.skip_class(CharClass::Whitespace); + + // Skip one word or seperator and take note of the class. + let class = nav.peek(CharClass::Whitespace); + if matches!(class, CharClass::Separator | CharClass::Word) { + nav.next(); + + let off = nav.offset(); + + // Continue skipping the same class. + nav.skip_class(class); + + // If the class was a separator and we only moved one character, + // continue skipping characters of the word class. + if off == nav.offset() && class == CharClass::Separator { + nav.skip_class(CharClass::Word); + } + } + + nav.offset() +} + +trait WordNavigation { + fn skip_newline(&mut self); + fn skip_class(&mut self, class: CharClass); + fn peek(&self, default: CharClass) -> CharClass; + fn next(&mut self); + fn offset(&self) -> usize; +} + +struct WordForward<'a> { + doc: &'a dyn Document, + offset: usize, + chunk: &'a [u8], + chunk_off: usize, +} + +impl WordNavigation for WordForward<'_> { + fn skip_newline(&mut self) { + // We can rely on the fact that the document does not split graphemes across chunks. + // = If there's a newline it's wholly contained in this chunk. + if self.chunk_off < self.chunk.len() && self.chunk[self.chunk_off] == b'\r' { + self.chunk_off += 1; + } + if self.chunk_off < self.chunk.len() && self.chunk[self.chunk_off] == b'\n' { + self.chunk_off += 1; + } + } + + fn skip_class(&mut self, class: CharClass) { + 'outer: loop { + while self.chunk_off < self.chunk.len() { + if WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize] != class { + break 'outer; + } + self.chunk_off += 1; + } + + self.offset += self.chunk.len(); + self.chunk = self.doc.read_forward(self.offset); + self.chunk_off = 0; + } + } + + fn peek(&self, default: CharClass) -> CharClass { + if self.chunk_off < self.chunk.len() { + WORD_CLASSIFIER[self.chunk[self.chunk_off] as usize] + } else { + default + } + } + + fn next(&mut self) { + self.chunk_off += 1; + } + + fn offset(&self) -> usize { + self.offset + self.chunk_off + } +} + +struct WordBackward<'a> { + doc: &'a dyn Document, + offset: usize, + chunk: &'a [u8], + chunk_off: usize, +} + +impl WordNavigation for WordBackward<'_> { + fn skip_newline(&mut self) { + // We can rely on the fact that the document does not split graphemes across chunks. + // = If there's a newline it's wholly contained in this chunk. + if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\r' { + self.chunk_off -= 1; + } + if self.chunk_off > 0 && self.chunk[self.chunk_off - 1] == b'\n' { + self.chunk_off -= 1; + } + } + + fn skip_class(&mut self, class: CharClass) { + 'outer: loop { + while self.chunk_off > 0 { + if WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize] != class { + break 'outer; + } + self.chunk_off -= 1; + } + + self.offset -= self.chunk.len(); + self.chunk = self.doc.read_backward(self.offset); + self.chunk_off = self.chunk.len(); + } + } + + fn peek(&self, default: CharClass) -> CharClass { + if self.chunk_off > 0 { + WORD_CLASSIFIER[self.chunk[self.chunk_off - 1] as usize] + } else { + default + } + } + + fn next(&mut self) { + self.chunk_off -= 1; + } + + fn offset(&self) -> usize { + self.offset - self.chunk.len() + self.chunk_off + } +} + +pub fn newlines_forward( + text: &[u8], + mut offset: usize, + mut line: CoordType, + line_stop: CoordType, +) -> (usize, CoordType) { + // Leaving the cursor at the beginning of the current line when the limit + // is 0 makes this function behave identical to ucd_newlines_backward. + if line >= line_stop { + return newlines_backward(text, offset, line, line_stop); + } + + let len = text.len(); + offset = offset.min(len); + + loop { + offset = memchr2(b'\r', b'\n', text, offset); + if offset >= len { + break; + } + + let ch = text[offset]; + offset += 1; + if ch == b'\r' && offset != len && text[offset] == b'\n' { + offset += 1; + } + + line += 1; + if line >= line_stop { + break; + } + } + + (offset, line) +} + +// Seeks to the start of the given line. +// No matter what parameters are given, it only returns an offset at the start of a line. +// Put differently, even if `line == line_stop`, it'll seek backward to the line start. +pub fn newlines_backward( + text: &[u8], + mut offset: usize, + mut line: CoordType, + line_stop: CoordType, +) -> (usize, CoordType) { + offset = offset.min(text.len()); + + loop { + offset = match memrchr2(b'\r', b'\n', text, offset) { + Some(i) => i, + None => return (0, line), + }; + if line <= line_stop { + // +1: Past the newline, at the start of the current line. + return (offset + 1, line); + } + + if text[offset] == b'\n' && offset != 0 && text[offset - 1] == b'\r' { + offset -= 1; + } + + line -= 1; + } +} + +pub fn strip_newline(mut text: &[u8]) -> &[u8] { + // Rust generates surprisingly tight assembly for this. + if text.last() == Some(&b'\n') { + text = &text[..text.len() - 1]; + } + if text.last() == Some(&b'\r') { + text = &text[..text.len() - 1]; + } + text +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_measure_forward_newline_start() { + let cursor = + MeasurementConfig::new(&"foo\nbar".as_bytes()).goto_visual(Point { x: 0, y: 1 }); + assert_eq!(cursor.offset, 4); + assert_eq!(cursor.logical_pos, Point { x: 0, y: 1 }); + assert_eq!(cursor.visual_pos, Point { x: 0, y: 1 }); + } + + #[test] + fn test_measure_forward_clipped_wide_char() { + let cursor = MeasurementConfig::new(&"a😶‍🌫️b".as_bytes()).goto_visual(Point { x: 2, y: 0 }); + assert_eq!(cursor.offset, 1); + assert_eq!(cursor.logical_pos, Point { x: 1, y: 0 }); + assert_eq!(cursor.visual_pos, Point { x: 1, y: 0 }); + } + + #[test] + fn test_measure_forward_word_wrap() { + // |foo␣ | + // |bar␣ | + // |baz | + let text = "foo bar \nbaz".as_bytes(); + + let cursor = MeasurementConfig::new(&text) + .with_word_wrap_column(6) + .goto_logical(Point { x: 5, y: 0 }); + assert_eq!(cursor.offset, 5); + assert_eq!(cursor.logical_pos, Point { x: 5, y: 0 }); + assert_eq!(cursor.visual_pos, Point { x: 1, y: 1 }); + + let mut cfg = MeasurementConfig::new(&text).with_word_wrap_column(6); + let cursor = cfg.goto_visual(Point { x: 5, y: 0 }); + assert_eq!(cursor.offset, 4); + assert_eq!(cursor.logical_pos, Point { x: 4, y: 0 }); + assert_eq!(cursor.visual_pos, Point { x: 0, y: 1 }); + + let cursor = cfg.goto_visual(Point { x: 0, y: 1 }); + assert_eq!(cursor.offset, 4); + assert_eq!(cursor.logical_pos, Point { x: 4, y: 0 }); + assert_eq!(cursor.visual_pos, Point { x: 0, y: 1 }); + + let cursor = cfg.goto_visual(Point { x: 100, y: 1 }); + assert_eq!(cursor.offset, 8); + assert_eq!(cursor.logical_pos, Point { x: 8, y: 0 }); + assert_eq!(cursor.visual_pos, Point { x: 4, y: 1 }); + + let cursor = cfg.goto_visual(Point { x: 0, y: 2 }); + assert_eq!(cursor.offset, 9); + assert_eq!(cursor.logical_pos, Point { x: 0, y: 1 }); + assert_eq!(cursor.visual_pos, Point { x: 0, y: 2 }); + + let cursor = cfg.goto_visual(Point { x: 100, y: 2 }); + assert_eq!(cursor.offset, 12); + assert_eq!(cursor.logical_pos, Point { x: 3, y: 1 }); + assert_eq!(cursor.visual_pos, Point { x: 3, y: 2 }); + } +} diff --git a/src/ucd_gen.rs b/src/ucd_gen.rs new file mode 100644 index 0000000..1f0e8f7 --- /dev/null +++ b/src/ucd_gen.rs @@ -0,0 +1,1066 @@ +// BEGIN: Generated by grapheme-table-gen on 2025-02-01T16:51:45Z, from Unicode 16.0.0, with --lang=rust --no-ambiguous --line-breaks, 16788 bytes +#[rustfmt::skip] +pub const STAGE0: [u16; 544] = [ + 0x0000, 0x0040, 0x007f, 0x00bf, 0x00ff, 0x013f, 0x017f, 0x0194, 0x0194, 0x01a6, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, + 0x0194, 0x0194, 0x0194, 0x0194, 0x01e6, 0x0226, 0x024a, 0x024b, 0x024c, 0x0246, 0x0255, 0x0295, 0x0295, 0x0295, 0x0295, 0x02cd, + 0x030d, 0x034d, 0x038d, 0x03cd, 0x040d, 0x0438, 0x0478, 0x049b, 0x04bc, 0x0295, 0x0295, 0x0295, 0x04f4, 0x0534, 0x0194, 0x0194, + 0x0574, 0x05b4, 0x0295, 0x0295, 0x0295, 0x05dd, 0x061d, 0x063d, 0x0295, 0x0663, 0x06a3, 0x06e3, 0x0723, 0x0763, 0x07a3, 0x07e3, + 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, + 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0823, + 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, + 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0823, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0863, 0x0873, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, + 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, 0x0295, +]; +#[rustfmt::skip] +pub const STAGE1: [u16; 2227] = [ + 0x0000, 0x0008, 0x0010, 0x0018, 0x0020, 0x0028, 0x0030, 0x0030, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x003d, 0x0036, 0x0045, 0x0045, 0x004a, 0x0052, 0x005a, 0x0062, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x006a, 0x0036, 0x0036, 0x0036, 0x0036, 0x006e, 0x0073, 0x0036, 0x007a, 0x007f, 0x0087, 0x008d, 0x0095, 0x0036, 0x009d, 0x00a5, 0x0036, 0x0036, 0x00aa, 0x00b2, 0x00b9, 0x00be, 0x00c4, 0x0036, 0x0036, 0x00cb, 0x00d3, 0x00d9, + 0x00e1, 0x00e8, 0x00f0, 0x00f8, 0x00fd, 0x0036, 0x0105, 0x010d, 0x0115, 0x011b, 0x0123, 0x012b, 0x0133, 0x0139, 0x0141, 0x0149, 0x0151, 0x0157, 0x015f, 0x0167, 0x016f, 0x0175, 0x017d, 0x0185, 0x018d, 0x0193, 0x019b, 0x01a3, 0x01ab, 0x01b3, 0x01bb, 0x01c2, 0x01ca, 0x01d0, 0x01d8, 0x01e0, 0x01e8, 0x01ee, 0x01f6, 0x01fe, 0x0206, 0x020c, 0x0214, 0x021c, 0x0224, 0x022b, 0x0233, 0x023b, 0x0241, 0x0245, 0x024d, 0x0241, 0x0241, 0x0254, 0x025c, 0x0241, 0x0264, 0x026c, 0x0070, 0x0274, 0x027c, 0x0283, 0x028b, 0x0241, + 0x0292, 0x029a, 0x02a2, 0x02aa, 0x0036, 0x02b2, 0x0036, 0x02ba, 0x02ba, 0x02ba, 0x02c2, 0x02c2, 0x02c8, 0x02ca, 0x02ca, 0x0036, 0x0036, 0x02d2, 0x0036, 0x02da, 0x02de, 0x02e6, 0x0036, 0x02ec, 0x0036, 0x02f2, 0x02fa, 0x0302, 0x0036, 0x0036, 0x030a, 0x0312, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x031a, 0x0036, 0x0036, 0x0322, 0x032a, 0x0332, 0x033a, 0x0342, 0x0241, 0x0347, 0x034f, 0x0357, 0x035f, + 0x0036, 0x0036, 0x0367, 0x036f, 0x0375, 0x0036, 0x0379, 0x0037, 0x0381, 0x0389, 0x0241, 0x0241, 0x0241, 0x038d, 0x0036, 0x0395, 0x0241, 0x039d, 0x03a5, 0x03ad, 0x03b4, 0x03b9, 0x0241, 0x03c1, 0x03c4, 0x03cc, 0x03d4, 0x03dc, 0x03e4, 0x0241, 0x03ec, 0x0036, 0x03f3, 0x03fb, 0x0402, 0x00f8, 0x040a, 0x0412, 0x041a, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0422, 0x0426, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x030a, 0x0036, 0x042e, 0x0436, 0x0036, 0x043e, 0x0442, 0x044a, 0x0452, + 0x045a, 0x0462, 0x046a, 0x0472, 0x047a, 0x0482, 0x0486, 0x048e, 0x0496, 0x049d, 0x04a5, 0x04ac, 0x04b3, 0x04b7, 0x0036, 0x04bf, 0x04c7, 0x04cf, 0x04d7, 0x04df, 0x04e6, 0x0036, 0x04ee, 0x04f4, 0x04fb, 0x0036, 0x0036, 0x0501, 0x0036, 0x0506, 0x050c, 0x0036, 0x0513, 0x051b, 0x0241, 0x0241, 0x0241, 0x0523, 0x0524, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x052c, 0x0534, 0x053c, 0x0544, 0x054c, 0x0554, 0x055c, 0x0564, 0x056c, 0x0574, 0x057c, 0x0584, 0x058b, 0x0592, 0x059a, 0x05a0, 0x05a8, 0x05b0, 0x05b7, 0x0036, + 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x05bb, 0x0036, 0x0036, 0x05c3, 0x0036, 0x05ca, 0x05d1, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x05d9, 0x0036, 0x05e1, 0x05e8, 0x05ee, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x05f4, 0x0036, 0x02b2, 0x0036, 0x05fc, 0x0604, 0x060c, 0x060c, 0x0045, 0x0614, 0x061c, 0x0624, 0x0241, 0x062c, 0x0633, 0x0633, 0x0636, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x063e, 0x0644, 0x064c, + 0x0654, 0x065c, 0x0664, 0x066c, 0x0674, 0x0664, 0x067c, 0x0684, 0x0688, 0x0633, 0x0633, 0x068d, 0x0633, 0x0633, 0x0694, 0x069c, 0x0633, 0x06a4, 0x0633, 0x06a8, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, + 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x06b0, 0x06b0, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x06b8, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, + 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x06be, 0x0633, 0x06c5, 0x0402, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x06ca, 0x06d2, 0x0036, 0x06da, 0x06e2, 0x0036, 0x0036, 0x06ea, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x06f2, 0x06fa, 0x0702, 0x070a, 0x0036, 0x0712, 0x071a, 0x071d, 0x0724, 0x072c, 0x00d3, 0x0734, 0x073b, 0x0743, 0x074b, 0x074f, 0x0757, 0x075f, 0x0241, 0x0766, 0x076e, 0x0776, 0x0241, 0x077e, 0x0786, 0x078e, 0x0796, 0x079e, + 0x0036, 0x07a3, 0x0036, 0x0036, 0x0036, 0x07ab, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, + 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07b9, 0x07b3, 0x07b4, 0x07b5, 0x07b6, 0x07b7, 0x07b8, 0x07c0, 0x07c7, 0x07ca, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x07d2, 0x07da, 0x07e2, 0x0036, 0x0036, 0x0036, 0x07ea, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x07ef, 0x0036, 0x0036, 0x05e9, 0x0036, 0x07f7, 0x07fb, 0x0803, 0x080b, 0x0812, + 0x081a, 0x0036, 0x0036, 0x0036, 0x0820, 0x0828, 0x0830, 0x0838, 0x0840, 0x0845, 0x084d, 0x0855, 0x085d, 0x006f, 0x0865, 0x086d, 0x0241, 0x0036, 0x0036, 0x0036, 0x07e4, 0x0875, 0x0878, 0x0036, 0x0036, 0x087e, 0x0240, 0x0886, 0x088a, 0x0241, 0x0241, 0x0241, 0x0241, 0x0892, 0x0036, 0x0895, 0x089d, 0x0036, 0x08a3, 0x00f8, 0x08a7, 0x08af, 0x0036, 0x08b7, 0x0241, 0x0036, 0x0036, 0x0036, 0x0036, 0x0436, 0x0363, 0x08bf, 0x08c5, 0x0036, 0x08ca, 0x0036, 0x08d1, 0x08d5, 0x08da, 0x0036, 0x08e2, 0x0036, 0x0036, 0x0036, + 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0604, 0x0379, 0x08e5, 0x0061, 0x08ed, 0x0241, 0x0241, 0x08f5, 0x08f8, 0x0900, 0x0036, 0x0037, 0x0908, 0x0241, 0x0910, 0x0917, 0x091f, 0x0241, 0x0241, 0x0036, 0x0927, 0x05e9, 0x0036, 0x092f, 0x0936, 0x093e, 0x0036, 0x0036, 0x0241, 0x0036, 0x0946, 0x0036, 0x094e, 0x0438, 0x0956, 0x095c, 0x0964, 0x0241, 0x0241, 0x0036, 0x0036, 0x096c, 0x0241, 0x0036, 0x07e6, 0x0036, 0x0974, 0x0036, 0x097b, 0x00d3, 0x0983, 0x098a, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0037, 0x0036, 0x0992, 0x099a, 0x099c, 0x0036, 0x08ca, 0x09a4, 0x0886, 0x09ac, 0x0886, 0x08e4, 0x0604, 0x09b4, 0x09b6, 0x09bd, 0x09c4, 0x03dc, 0x09cc, 0x09d4, 0x09da, 0x09e2, 0x09e9, 0x09f1, 0x09f5, 0x03dc, 0x09fd, 0x0a05, 0x0a0d, 0x005e, 0x0a15, 0x0a1d, 0x0241, 0x0a25, 0x0a2d, 0x0063, 0x0a35, 0x0a3d, 0x0a3f, 0x0a47, 0x0a4f, 0x0241, 0x0a55, 0x0a5d, 0x0a65, 0x0036, 0x0a6d, 0x0a75, 0x0a7d, 0x0036, 0x0a85, 0x0a8d, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0036, 0x0a95, 0x0a9d, 0x0241, 0x0036, 0x0aa5, 0x0aad, + 0x0ab5, 0x0036, 0x0abd, 0x0ac5, 0x0acc, 0x0acd, 0x0ad5, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0036, 0x0add, 0x0241, 0x0241, 0x0241, 0x0036, 0x0036, 0x0ae5, 0x0241, 0x0aed, 0x0af5, 0x0241, 0x0241, 0x05eb, 0x0afd, 0x0b05, 0x0b0d, 0x0b11, 0x0b19, 0x0036, 0x0b20, 0x0b28, 0x0036, 0x0367, 0x0b30, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0036, 0x0b38, 0x0b40, 0x0b45, 0x0b4d, 0x0b54, 0x0b59, 0x0b5f, 0x0241, 0x0241, 0x0b67, 0x0b6b, 0x0b73, 0x0b7b, 0x0b81, 0x0b89, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0b8d, 0x0b95, 0x0b98, 0x0ba0, 0x0241, 0x0241, 0x0ba7, 0x0baf, 0x0bb7, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0302, 0x0241, 0x0241, 0x0241, 0x0036, 0x0036, 0x0036, 0x0bbf, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0bc7, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0886, 0x0036, 0x0036, 0x07e6, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, + 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0bcf, 0x0036, 0x0bd7, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0bda, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0be1, 0x0be9, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, + 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x07e4, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0bf1, 0x0036, 0x0036, 0x0036, 0x0bf8, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0bfa, 0x0c02, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, + 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0367, 0x0037, 0x0c0a, 0x0036, 0x0037, 0x0363, 0x0c0f, 0x0036, 0x0c17, 0x0c1e, 0x0c26, 0x08e3, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0036, 0x0c2e, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0036, 0x0036, 0x0c36, 0x0241, 0x0241, 0x0241, 0x0036, 0x0036, 0x0c3e, 0x0c43, 0x0c49, 0x0241, 0x0241, 0x0c51, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, + 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0635, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, + 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x06b0, 0x0c59, 0x0c5f, 0x0c67, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0c6b, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0c73, 0x0c78, 0x0c7f, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0634, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0036, 0x0036, 0x0036, 0x0c87, 0x0c8c, 0x0c94, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0c9c, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x08e2, 0x0241, 0x0241, 0x0045, 0x0ca4, 0x0cab, 0x0036, 0x0036, 0x0036, 0x0bc7, 0x0241, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0379, 0x0036, 0x0cb2, 0x0036, 0x0cb9, 0x0cc1, 0x0cc7, 0x0036, 0x051b, 0x0036, 0x0036, 0x0ccf, 0x0241, 0x0241, 0x0241, 0x08e2, 0x08e2, 0x06b0, 0x06b0, 0x0cd7, 0x0cdf, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0036, 0x0036, 0x043e, 0x0036, 0x0ce7, 0x0cef, 0x0cf7, 0x0036, 0x0cfe, 0x0cf9, 0x0d06, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0d0d, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0d12, 0x0d16, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0045, 0x0d1e, 0x0045, 0x0d25, 0x0d2c, 0x0d34, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0037, 0x0d3b, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0d43, 0x0d4b, 0x0036, 0x0d50, 0x0d55, 0x0241, 0x0241, 0x0241, 0x0036, 0x0d5d, 0x0d65, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0886, 0x0d6d, 0x0036, 0x0d75, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0886, 0x0d7d, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0886, 0x0d85, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0d8d, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0036, 0x0d95, 0x0241, 0x0036, 0x0036, 0x0d9d, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0da5, 0x0036, 0x0daa, 0x0241, 0x0241, 0x0db2, 0x0436, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0cf7, 0x0dba, 0x0dc2, 0x0dca, 0x0dd2, 0x0dda, 0x0241, 0x0b34, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0de2, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de9, 0x0de4, 0x0df1, 0x0df6, 0x0241, 0x0dfc, 0x0e04, 0x0e0b, 0x0de4, 0x0e12, 0x0e1a, 0x0e21, 0x0e29, 0x0e31, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0e39, 0x0e41, 0x0e39, 0x0e47, 0x0e4f, + 0x0e57, 0x0e5f, 0x0e67, 0x0e39, 0x0e6f, 0x0e77, 0x0e39, 0x0e39, 0x0e7f, 0x0e39, 0x0e84, 0x0e8c, 0x0e93, 0x0e9b, 0x0ea1, 0x0ea8, 0x0de2, 0x0eae, 0x0eb5, 0x0e39, 0x0e39, 0x0ebc, 0x0ec0, 0x0e39, 0x0e39, 0x0ec8, 0x0ed0, 0x0036, 0x0036, 0x0036, 0x0ed8, 0x0036, 0x0036, 0x0ee0, 0x0ee8, 0x0ef0, 0x0036, 0x0ef6, 0x0036, 0x0efe, 0x0f03, 0x0f0b, 0x0f0c, 0x0f14, 0x0f17, 0x0f1e, 0x0e39, 0x0e39, 0x0e39, 0x0e39, 0x0e39, 0x0f26, 0x0f26, 0x0f29, 0x0f2e, 0x0f36, 0x0e39, 0x0f3d, 0x0f45, 0x0036, 0x0036, 0x0036, 0x0036, 0x0031, + 0x0036, 0x0036, 0x0c9c, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0de4, 0x0f4c, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, + 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0633, 0x0f54, 0x0f5c, 0x0045, 0x0045, 0x0045, 0x0020, 0x0020, 0x0020, 0x0020, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0045, 0x0f64, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, + 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, +]; +#[rustfmt::skip] +pub const STAGE2: [u16; 3948] = [ + 0x0000, 0x0000, 0x0003, 0x0006, 0x0000, 0x0000, 0x0000, 0x0000, + 0x000a, 0x000e, 0x0012, 0x0016, 0x001a, 0x001a, 0x001c, 0x0020, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0025, 0x0029, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0025, 0x002d, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0031, 0x0034, 0x0037, 0x003b, 0x003f, 0x0043, 0x0045, 0x0049, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x004d, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x004d, 0x0051, + 0x0055, 0x0056, 0x0024, 0x005a, 0x005e, 0x0062, 0x0062, 0x0062, + 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, 0x0063, 0x0062, 0x0062, + 0x0062, 0x0066, 0x0067, 0x0062, 0x0062, 0x0062, 0x0024, 0x0024, + 0x006b, 0x006d, 0x005a, 0x0024, 0x004d, 0x0070, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0072, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0075, 0x0062, 0x0078, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0056, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x004d, 0x0056, 0x0024, 0x0024, 0x007b, 0x007e, 0x0082, 0x0062, + 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, 0x0084, 0x0077, + 0x0087, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x004d, + 0x005b, 0x0024, 0x0044, 0x005a, 0x005a, 0x008b, 0x008d, 0x0090, + 0x001e, 0x0062, 0x0062, 0x0094, 0x0096, 0x0024, 0x0024, 0x0075, + 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, 0x001a, 0x001a, 0x009a, + 0x009d, 0x00a1, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x00a5, + 0x0062, 0x00a8, 0x0062, 0x00ab, 0x00ae, 0x0078, 0x001a, 0x001a, + 0x00b2, 0x0024, 0x0024, 0x0024, 0x00b4, 0x00a0, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0062, 0x0062, 0x0062, 0x0062, 0x00b8, 0x0056, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0076, 0x0062, 0x0062, 0x00bc, + 0x005a, 0x005a, 0x005a, 0x001a, 0x001a, 0x00b2, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0075, 0x0062, 0x0062, 0x0024, 0x00c0, + 0x00c3, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0076, 0x00c7, + 0x0062, 0x00c9, 0x00c9, 0x00cb, 0x0024, 0x0024, 0x0024, 0x004d, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x00c9, 0x005c, + 0x0024, 0x0024, 0x004d, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, + 0x004d, 0x00cf, 0x00d1, 0x0062, 0x0062, 0x0024, 0x0024, 0x0076, + 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, 0x00d4, 0x0062, 0x0062, + 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, 0x00d7, 0x0024, 0x0024, + 0x0024, 0x0024, 0x00db, 0x00dc, 0x00dc, 0x00dc, 0x00dc, 0x00dc, + 0x00dc, 0x00de, 0x00e2, 0x00e5, 0x0062, 0x00e8, 0x00eb, 0x00c9, + 0x0062, 0x00dc, 0x00dc, 0x0076, 0x00ef, 0x001a, 0x001a, 0x0024, + 0x0024, 0x00dc, 0x00dc, 0x00f3, 0x0056, 0x0024, 0x00f7, 0x00f7, + 0x00db, 0x00dc, 0x00dc, 0x00fb, 0x00dc, 0x00fe, 0x0101, 0x0103, + 0x00e2, 0x00e5, 0x0107, 0x010a, 0x010d, 0x005a, 0x0110, 0x005a, + 0x00fd, 0x0076, 0x0114, 0x001a, 0x001a, 0x0118, 0x0024, 0x011c, + 0x0120, 0x0123, 0x0056, 0x004d, 0x005b, 0x00f7, 0x0024, 0x0024, + 0x0024, 0x0070, 0x0024, 0x0070, 0x0071, 0x0058, 0x0127, 0x012a, + 0x00d1, 0x012c, 0x00cb, 0x0106, 0x005a, 0x0056, 0x0130, 0x005a, + 0x0114, 0x001a, 0x001a, 0x0078, 0x0134, 0x005a, 0x005a, 0x0123, + 0x0056, 0x0024, 0x0072, 0x0072, 0x00db, 0x00dc, 0x00dc, 0x00fb, + 0x00dc, 0x00fb, 0x0137, 0x0103, 0x00e2, 0x00e5, 0x00b9, 0x013b, + 0x013e, 0x0044, 0x005a, 0x005a, 0x005a, 0x0076, 0x0114, 0x001a, + 0x001a, 0x0142, 0x005a, 0x0145, 0x0062, 0x0149, 0x0056, 0x0024, + 0x00f7, 0x00f7, 0x00db, 0x00dc, 0x00dc, 0x00fb, 0x00dc, 0x00fb, + 0x0137, 0x0103, 0x014d, 0x00e5, 0x0107, 0x010a, 0x013e, 0x005a, + 0x0123, 0x005a, 0x00fd, 0x0076, 0x0114, 0x001a, 0x001a, 0x0151, + 0x0024, 0x005a, 0x005a, 0x012d, 0x0056, 0x004d, 0x006b, 0x0070, + 0x0058, 0x0071, 0x0070, 0x005b, 0x0044, 0x004d, 0x006b, 0x0024, + 0x0024, 0x0058, 0x0155, 0x0159, 0x0155, 0x015b, 0x015e, 0x0044, + 0x0110, 0x005a, 0x005a, 0x0114, 0x001a, 0x001a, 0x0024, 0x0024, + 0x0162, 0x005a, 0x00e8, 0x00a1, 0x0024, 0x0070, 0x0070, 0x00db, + 0x00dc, 0x00dc, 0x00fb, 0x00dc, 0x00dc, 0x00dc, 0x0103, 0x00ae, + 0x00e8, 0x0166, 0x0122, 0x0169, 0x005a, 0x016c, 0x0170, 0x0173, + 0x0076, 0x0114, 0x001a, 0x001a, 0x005a, 0x0052, 0x0024, 0x0024, + 0x00f3, 0x0177, 0x0024, 0x0070, 0x0070, 0x0024, 0x0024, 0x0024, + 0x0070, 0x0024, 0x0024, 0x0056, 0x0058, 0x014d, 0x017b, 0x017e, + 0x015b, 0x00cb, 0x005a, 0x0182, 0x005a, 0x0071, 0x0076, 0x0114, + 0x001a, 0x001a, 0x0185, 0x005a, 0x005a, 0x005a, 0x00e7, 0x0024, + 0x0024, 0x0070, 0x0070, 0x00db, 0x00dc, 0x00dc, 0x00dc, 0x00dc, + 0x00dc, 0x00dc, 0x00dd, 0x00e2, 0x00e5, 0x0127, 0x015b, 0x0188, + 0x005a, 0x0178, 0x0024, 0x0024, 0x0076, 0x0114, 0x001a, 0x001a, + 0x0024, 0x0024, 0x018b, 0x0024, 0x0149, 0x0056, 0x0024, 0x0024, + 0x0024, 0x004d, 0x006b, 0x0024, 0x0024, 0x0024, 0x0024, 0x0072, + 0x0024, 0x0024, 0x0173, 0x0024, 0x004d, 0x0105, 0x0110, 0x00e4, + 0x018f, 0x017b, 0x017b, 0x005a, 0x0114, 0x001a, 0x001a, 0x0155, + 0x0044, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x0190, 0x0062, 0x00b8, 0x0194, 0x005a, 0x00d1, 0x0062, + 0x0198, 0x001a, 0x001a, 0x019c, 0x005a, 0x005a, 0x005a, 0x005a, + 0x0190, 0x0062, 0x0062, 0x01a0, 0x005a, 0x005a, 0x0062, 0x00b8, + 0x001a, 0x001a, 0x01a4, 0x005a, 0x01a8, 0x01ab, 0x01af, 0x01b3, + 0x01b5, 0x0023, 0x0078, 0x0024, 0x001a, 0x001a, 0x00b2, 0x0024, + 0x0024, 0x01b9, 0x01bb, 0x01bd, 0x0024, 0x0024, 0x0024, 0x0044, + 0x0082, 0x0062, 0x0062, 0x01c1, 0x0062, 0x0085, 0x0024, 0x00c9, + 0x0062, 0x0062, 0x0082, 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, + 0x0062, 0x0062, 0x01c5, 0x0024, 0x009f, 0x0024, 0x0070, 0x01c9, + 0x0024, 0x01cd, 0x005a, 0x005a, 0x005a, 0x0082, 0x01d1, 0x0062, + 0x0123, 0x012a, 0x001a, 0x001a, 0x019c, 0x0024, 0x005a, 0x0155, + 0x00cb, 0x00d2, 0x01a0, 0x005a, 0x005a, 0x005a, 0x0082, 0x01a0, + 0x005a, 0x005a, 0x0105, 0x012a, 0x005a, 0x0106, 0x001a, 0x001a, + 0x01a4, 0x0106, 0x0024, 0x0072, 0x005a, 0x0173, 0x0024, 0x0024, + 0x0024, 0x0024, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, + 0x01d5, 0x01d5, 0x01d9, 0x01d9, 0x01d9, 0x01d9, 0x01d9, 0x01d9, + 0x01d9, 0x01d9, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, + 0x01dd, 0x01dd, 0x0024, 0x0024, 0x0070, 0x0058, 0x0024, 0x004d, + 0x0070, 0x0058, 0x0024, 0x0024, 0x0070, 0x0058, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0070, 0x0058, 0x0024, 0x004d, 0x0070, 0x0058, + 0x0024, 0x0024, 0x0024, 0x004d, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0070, 0x0058, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x004d, 0x0082, 0x01e1, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0044, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0058, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0058, + 0x0024, 0x0058, 0x01e2, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x01e2, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0025, 0x01e6, 0x0024, 0x0024, 0x01ea, 0x01ed, 0x0024, 0x0024, + 0x0044, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x0076, 0x01f1, + 0x005a, 0x005b, 0x0024, 0x0024, 0x0024, 0x0024, 0x0076, 0x01f5, + 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x0076, 0x005a, + 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0070, 0x01f9, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x0124, 0x0062, 0x00e7, 0x017b, + 0x0157, 0x00e5, 0x0062, 0x0062, 0x01fd, 0x0201, 0x0106, 0x001a, + 0x001a, 0x01a4, 0x005a, 0x0024, 0x0024, 0x0058, 0x005a, 0x0205, + 0x0209, 0x020d, 0x0210, 0x001a, 0x001a, 0x01a4, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0044, 0x005a, 0x0024, + 0x0077, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0134, + 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0058, 0x005a, + 0x005a, 0x00d7, 0x0214, 0x00e8, 0x005a, 0x0157, 0x017b, 0x00e5, + 0x005a, 0x0044, 0x0098, 0x001a, 0x001a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x001a, 0x001a, 0x0218, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0075, 0x014a, 0x006b, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x021b, 0x0062, 0x00b8, 0x018f, 0x0082, 0x0062, + 0x00e8, 0x0214, 0x0062, 0x0062, 0x012c, 0x001a, 0x001a, 0x01a4, + 0x005a, 0x001a, 0x001a, 0x01a4, 0x005a, 0x005a, 0x005a, 0x005a, + 0x0062, 0x0062, 0x0062, 0x0062, 0x00b8, 0x005a, 0x005a, 0x005a, + 0x005a, 0x0062, 0x0193, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x01d1, 0x00d7, 0x00e8, 0x0157, 0x0193, 0x005a, 0x021f, + 0x005a, 0x005a, 0x021f, 0x0223, 0x0226, 0x0227, 0x0228, 0x0062, + 0x0062, 0x0227, 0x0227, 0x0223, 0x00d8, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x022c, 0x00e7, 0x0124, 0x0078, + 0x001a, 0x001a, 0x00b2, 0x0024, 0x005a, 0x0230, 0x00e7, 0x0233, + 0x00e7, 0x005a, 0x005a, 0x0024, 0x017b, 0x017b, 0x0062, 0x0062, + 0x00e4, 0x0237, 0x023a, 0x001a, 0x001a, 0x01a4, 0x0056, 0x001a, + 0x001a, 0x00b2, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x01eb, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x004d, 0x0056, 0x0024, 0x0024, 0x005a, 0x005a, 0x0198, 0x0062, + 0x0062, 0x0062, 0x01d1, 0x0062, 0x00a1, 0x00a0, 0x0024, 0x023e, + 0x0242, 0x005a, 0x0062, 0x0062, 0x0062, 0x0246, 0x0062, 0x0062, + 0x0062, 0x0062, 0x0062, 0x0062, 0x0062, 0x0247, 0x0024, 0x0058, + 0x0024, 0x0058, 0x0024, 0x0024, 0x005d, 0x005d, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0058, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0070, 0x0024, 0x0024, 0x0024, 0x006b, + 0x0024, 0x0056, 0x0024, 0x0024, 0x0024, 0x0024, 0x006b, 0x0070, + 0x0024, 0x024b, 0x023a, 0x024f, 0x0253, 0x0257, 0x0251, 0x005b, + 0x025b, 0x025b, 0x006b, 0x025f, 0x0263, 0x0265, 0x0269, 0x0269, + 0x026d, 0x0271, 0x0024, 0x0275, 0x0278, 0x0024, 0x0024, 0x027a, + 0x023a, 0x027e, 0x0282, 0x0285, 0x0062, 0x0062, 0x0058, 0x0056, + 0x0024, 0x0289, 0x0044, 0x0056, 0x0024, 0x0289, 0x0024, 0x0024, + 0x0024, 0x0044, 0x028d, 0x028e, 0x028d, 0x028d, 0x028d, 0x028f, + 0x028e, 0x028f, 0x0291, 0x028d, 0x028d, 0x028d, 0x0062, 0x0062, + 0x0062, 0x0062, 0x01a0, 0x005a, 0x005a, 0x005a, 0x0295, 0x0070, + 0x018b, 0x0024, 0x004d, 0x0299, 0x0024, 0x0024, 0x029c, 0x0024, + 0x004d, 0x0024, 0x0024, 0x0024, 0x029f, 0x0024, 0x0024, 0x0024, + 0x0024, 0x005a, 0x005a, 0x005a, 0x005b, 0x005a, 0x005a, 0x005a, + 0x0024, 0x005a, 0x005a, 0x006b, 0x0024, 0x0024, 0x0070, 0x005a, + 0x005a, 0x02a3, 0x02a5, 0x0024, 0x0024, 0x02a8, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0072, 0x0056, 0x0024, 0x0024, 0x0173, + 0x004d, 0x0071, 0x004d, 0x02ab, 0x0070, 0x0072, 0x0044, 0x0071, + 0x0130, 0x005a, 0x005d, 0x0024, 0x005a, 0x0024, 0x006b, 0x0024, + 0x0024, 0x0056, 0x0056, 0x0072, 0x0024, 0x0024, 0x0024, 0x006b, + 0x005a, 0x0058, 0x0058, 0x0024, 0x0024, 0x0024, 0x0024, 0x0058, + 0x0058, 0x0024, 0x0024, 0x0024, 0x0070, 0x0070, 0x0024, 0x0070, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x004d, 0x0024, 0x0024, + 0x0024, 0x02af, 0x0024, 0x0024, 0x0024, 0x0024, 0x02b3, 0x0024, + 0x0072, 0x0024, 0x02b7, 0x0024, 0x0024, 0x02bb, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x02bf, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x02c0, 0x0024, 0x0024, 0x0024, 0x0024, 0x02c4, 0x02c7, + 0x02cb, 0x0024, 0x02cf, 0x0024, 0x0024, 0x0058, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x004d, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x02d3, 0x005a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005b, 0x005c, 0x005a, 0x02d6, 0x0024, + 0x0058, 0x02da, 0x0024, 0x006b, 0x02dc, 0x0058, 0x0071, 0x0058, + 0x006b, 0x0024, 0x0024, 0x0024, 0x0058, 0x006b, 0x0024, 0x004d, + 0x0024, 0x0024, 0x02c0, 0x02e0, 0x02e4, 0x02e8, 0x02eb, 0x02ed, + 0x02cf, 0x02f1, 0x02f5, 0x02e4, 0x02f9, 0x02f9, 0x02f9, 0x02f9, + 0x02fd, 0x02fd, 0x0301, 0x02f9, 0x0305, 0x02f9, 0x02fd, 0x02fd, + 0x02fd, 0x02f9, 0x02f9, 0x02f9, 0x0309, 0x0309, 0x030d, 0x0309, + 0x02f9, 0x02f9, 0x02f9, 0x02c8, 0x02f9, 0x02d8, 0x0311, 0x0313, + 0x02fa, 0x02f9, 0x02f9, 0x02ed, 0x0317, 0x02f9, 0x02fb, 0x02f9, + 0x02f9, 0x02f9, 0x02f9, 0x031a, 0x02e4, 0x031b, 0x031e, 0x0321, + 0x0324, 0x0328, 0x031d, 0x032c, 0x02e6, 0x02f9, 0x0330, 0x02a3, + 0x0333, 0x0337, 0x033a, 0x033d, 0x02e4, 0x0340, 0x0344, 0x02f7, + 0x02cf, 0x0348, 0x0024, 0x029f, 0x0024, 0x034c, 0x0024, 0x02c0, + 0x02bf, 0x0024, 0x0024, 0x0350, 0x0024, 0x0354, 0x0357, 0x035a, + 0x035e, 0x0361, 0x0364, 0x02f8, 0x02b3, 0x02b3, 0x02b3, 0x0368, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x02c4, 0x0024, 0x0024, + 0x029f, 0x0024, 0x0024, 0x0024, 0x034c, 0x0024, 0x0024, 0x0357, + 0x0024, 0x036c, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x036f, 0x02b3, 0x02b3, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x02d8, 0x0024, 0x0024, 0x0025, 0x0372, 0x0372, 0x0372, 0x0372, + 0x0372, 0x0376, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x02b3, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x02b5, 0x0024, 0x0379, 0x0024, 0x0024, 0x0024, 0x0024, 0x0357, + 0x034c, 0x0024, 0x0024, 0x0024, 0x0024, 0x034c, 0x037d, 0x006b, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x006b, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0072, 0x0024, 0x0024, 0x0024, 0x0075, + 0x0078, 0x005a, 0x0380, 0x0383, 0x0024, 0x0024, 0x005a, 0x005b, + 0x0386, 0x005a, 0x005a, 0x00d1, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x004d, 0x005a, 0x005a, 0x0024, 0x004d, 0x0024, 0x004d, + 0x0024, 0x004d, 0x0024, 0x004d, 0x0361, 0x0361, 0x0361, 0x038a, + 0x023a, 0x038c, 0x0390, 0x0394, 0x0398, 0x02b3, 0x039a, 0x039c, + 0x038c, 0x01e2, 0x0058, 0x03a0, 0x03a4, 0x023a, 0x03a0, 0x03a2, + 0x0020, 0x03a8, 0x03aa, 0x03ae, 0x03b2, 0x03b2, 0x03b2, 0x03b2, + 0x03b2, 0x03b2, 0x03b4, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b2, + 0x03b2, 0x03b2, 0x03b2, 0x005a, 0x005a, 0x005a, 0x03b2, 0x03b2, + 0x03b2, 0x03b2, 0x03b2, 0x03b7, 0x005a, 0x005a, 0x005a, 0x005a, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03bb, 0x03be, 0x03c2, 0x03c2, + 0x03c4, 0x03c2, 0x03c2, 0x03c8, 0x03b2, 0x03b2, 0x03cc, 0x03ce, + 0x03d2, 0x03d5, 0x03d7, 0x03da, 0x03de, 0x03e0, 0x03d5, 0x03b2, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03d3, 0x03b2, 0x03b2, 0x03b2, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03d3, 0x03e0, 0x03b2, 0x03d4, + 0x03b2, 0x03e2, 0x03e5, 0x03e8, 0x03ec, 0x03e0, 0x03d5, 0x03b2, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03d3, 0x03e0, 0x03b2, 0x03d4, + 0x03b2, 0x03ee, 0x03d7, 0x03f2, 0x005a, 0x03b1, 0x03b2, 0x03b2, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b1, 0x03b2, 0x03b2, 0x03b2, + 0x03b3, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b7, 0x005a, 0x03f6, + 0x03fa, 0x03fa, 0x03fa, 0x03fa, 0x03b2, 0x03b2, 0x03b2, 0x03b2, + 0x03b2, 0x03b2, 0x03b2, 0x03b3, 0x03b2, 0x03b2, 0x005a, 0x005a, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03fe, 0x0400, 0x03b2, + 0x0313, 0x0313, 0x0313, 0x0313, 0x0313, 0x0313, 0x0313, 0x0313, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03be, 0x03b2, 0x03b2, + 0x03b2, 0x03f5, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b3, 0x005a, + 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x0404, 0x0024, 0x0024, + 0x0024, 0x0024, 0x001a, 0x001a, 0x00b2, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0075, 0x0198, 0x0062, + 0x0062, 0x0078, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0076, 0x0024, 0x0024, 0x0024, 0x0024, 0x0408, 0x023a, + 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0058, 0x0072, 0x0056, + 0x0024, 0x0044, 0x005a, 0x005a, 0x005a, 0x005a, 0x006b, 0x0024, + 0x0024, 0x0024, 0x009f, 0x009f, 0x0075, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0178, 0x040c, 0x0024, 0x01a0, 0x0024, 0x0024, + 0x0410, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0414, + 0x005a, 0x005a, 0x0418, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x017b, 0x017b, 0x017b, 0x00cb, 0x005a, 0x021f, + 0x001a, 0x001a, 0x01a4, 0x005a, 0x0062, 0x0062, 0x0062, 0x0062, + 0x0078, 0x0024, 0x0024, 0x041c, 0x0024, 0x0076, 0x0062, 0x01fb, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0075, 0x0062, 0x0062, 0x00e7, + 0x005a, 0x005a, 0x005b, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, + 0x01d5, 0x01d5, 0x0420, 0x00d7, 0x005a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x00d1, 0x00e4, 0x00e7, 0x00e7, 0x0424, + 0x0425, 0x0225, 0x0429, 0x005a, 0x005a, 0x005a, 0x042d, 0x005a, + 0x0106, 0x005a, 0x005a, 0x001a, 0x001a, 0x01a4, 0x005a, 0x005a, + 0x0082, 0x00d7, 0x040c, 0x012a, 0x005a, 0x005a, 0x023b, 0x023a, + 0x023a, 0x01f1, 0x005a, 0x005a, 0x005a, 0x0223, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x01a0, 0x005a, 0x005a, + 0x005a, 0x005a, 0x0122, 0x012c, 0x01a0, 0x00d2, 0x0106, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, + 0x0178, 0x00e7, 0x01ed, 0x0431, 0x005a, 0x005a, 0x0056, 0x004d, + 0x0056, 0x004d, 0x0056, 0x004d, 0x005a, 0x005a, 0x0024, 0x004d, + 0x0024, 0x004d, 0x0024, 0x0024, 0x0024, 0x0024, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0178, 0x0158, 0x0435, 0x015e, 0x001a, + 0x001a, 0x01a4, 0x005a, 0x0439, 0x043a, 0x043a, 0x043a, 0x043a, + 0x043a, 0x043a, 0x0439, 0x043a, 0x043a, 0x043a, 0x043a, 0x043a, + 0x043a, 0x005a, 0x005a, 0x005a, 0x01d9, 0x01d9, 0x01d9, 0x01d9, + 0x043e, 0x0441, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, + 0x01dd, 0x005a, 0x0024, 0x004d, 0x005a, 0x005a, 0x005b, 0x0024, + 0x005a, 0x0133, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x004d, + 0x0024, 0x0130, 0x0072, 0x0070, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x004d, 0x005a, 0x005a, 0x005a, 0x005b, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0445, 0x0024, + 0x0024, 0x005a, 0x005b, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0449, 0x0062, 0x0062, 0x0062, 0x044d, 0x0451, + 0x0454, 0x0458, 0x005a, 0x045c, 0x045e, 0x045d, 0x045f, 0x03b2, + 0x03c1, 0x0463, 0x0463, 0x0467, 0x046b, 0x03b2, 0x046f, 0x0473, + 0x03c1, 0x03c3, 0x03b2, 0x03b3, 0x0477, 0x005a, 0x0024, 0x0070, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x047b, + 0x047f, 0x0483, 0x03c4, 0x0487, 0x03b2, 0x03b2, 0x048a, 0x048e, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x0492, 0x0488, + 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x0492, 0x0496, + 0x049a, 0x049d, 0x005a, 0x005a, 0x04a0, 0x0227, 0x0227, 0x0227, + 0x0227, 0x0227, 0x0227, 0x0227, 0x04a2, 0x0227, 0x0227, 0x0227, + 0x0227, 0x0227, 0x0227, 0x0227, 0x04a6, 0x042d, 0x0227, 0x042d, + 0x0227, 0x042d, 0x0227, 0x042d, 0x04a8, 0x04ac, 0x04af, 0x0024, + 0x004d, 0x0000, 0x0000, 0x0264, 0x005a, 0x0024, 0x004d, 0x0024, + 0x0024, 0x0024, 0x0024, 0x004d, 0x0072, 0x0024, 0x0024, 0x0024, + 0x0058, 0x0024, 0x0024, 0x0024, 0x0058, 0x04b3, 0x005b, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x005b, 0x0024, 0x0024, + 0x0024, 0x004d, 0x0024, 0x0024, 0x0024, 0x0044, 0x005a, 0x005a, + 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x04b7, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0044, 0x005a, 0x005a, 0x005a, 0x00a1, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x005a, 0x005a, 0x0056, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0076, 0x00b8, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x04bb, 0x0024, + 0x005a, 0x0024, 0x0024, 0x01e2, 0x0058, 0x005a, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x005a, 0x005a, 0x0024, 0x0024, + 0x0024, 0x0024, 0x005a, 0x005a, 0x005b, 0x0024, 0x0024, 0x004d, + 0x0024, 0x004d, 0x0072, 0x0024, 0x0024, 0x0024, 0x0072, 0x0024, + 0x0072, 0x0044, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0070, 0x0024, 0x004d, 0x005a, 0x0024, 0x0058, 0x0070, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0072, 0x0044, 0x00f7, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x04bb, 0x0024, 0x0024, + 0x005a, 0x005b, 0x0024, 0x0024, 0x005a, 0x005a, 0x005a, 0x005a, + 0x0024, 0x0024, 0x0024, 0x0024, 0x004d, 0x0058, 0x005b, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0237, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0058, 0x005b, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x005a, 0x0024, 0x00c9, + 0x016c, 0x005a, 0x0062, 0x0024, 0x0056, 0x0056, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0058, 0x00b8, 0x00d1, 0x0024, 0x0024, + 0x0044, 0x005a, 0x023a, 0x023a, 0x0044, 0x005a, 0x0024, 0x04bf, + 0x005b, 0x0024, 0x023a, 0x04c3, 0x005a, 0x005a, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0058, 0x0239, 0x023a, 0x0024, 0x0024, + 0x0024, 0x0024, 0x004d, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0058, 0x005a, 0x0056, 0x0044, 0x005a, 0x005a, 0x0056, 0x0024, + 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0044, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, + 0x004d, 0x005a, 0x006b, 0x0024, 0x0062, 0x005a, 0x005a, 0x001a, + 0x001a, 0x01a4, 0x005a, 0x0024, 0x0058, 0x0082, 0x04c7, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0058, 0x005a, 0x006b, 0x005a, 0x005a, + 0x005a, 0x005a, 0x0024, 0x0024, 0x04ca, 0x04cd, 0x0058, 0x005a, + 0x005a, 0x005a, 0x006b, 0x0044, 0x005a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x0062, 0x0024, 0x0076, 0x0062, 0x0062, + 0x00a1, 0x0024, 0x0058, 0x005a, 0x0076, 0x0078, 0x0058, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x021c, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x0062, 0x0062, 0x0083, 0x0226, + 0x04a7, 0x042d, 0x0227, 0x0227, 0x0227, 0x04a7, 0x005a, 0x005a, + 0x012c, 0x01a0, 0x005a, 0x04cf, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0214, 0x00d7, 0x0241, 0x04d3, 0x019e, 0x005a, 0x005a, 0x04d7, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0044, 0x005a, 0x001a, 0x001a, + 0x01a4, 0x005a, 0x0198, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0075, 0x0062, 0x00e5, 0x0062, 0x04db, 0x001a, + 0x001a, 0x023a, 0x04df, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0075, 0x024b, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0178, 0x00e4, 0x0062, 0x00d7, 0x04e1, 0x01ec, 0x04e5, + 0x014d, 0x001a, 0x001a, 0x04e9, 0x027e, 0x0056, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0044, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0214, 0x00e7, 0x01d1, 0x038c, 0x04ed, 0x04b7, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x004d, 0x0070, + 0x0072, 0x0024, 0x0024, 0x0024, 0x0072, 0x0024, 0x0024, 0x04f0, + 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, 0x0214, 0x0062, 0x00b8, + 0x005a, 0x001a, 0x001a, 0x01a4, 0x005a, 0x00e7, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x00d1, 0x04f4, 0x00e8, + 0x010a, 0x010a, 0x04f6, 0x005a, 0x0110, 0x005a, 0x042b, 0x0155, + 0x00d2, 0x0062, 0x01a0, 0x0062, 0x01a0, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x04f8, 0x0214, 0x0062, 0x0191, 0x04fc, 0x017c, + 0x0157, 0x0500, 0x0503, 0x04a8, 0x005a, 0x016c, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x017a, 0x0062, 0x0062, 0x00e4, 0x00e0, 0x01ea, + 0x03a0, 0x001a, 0x001a, 0x019c, 0x0133, 0x0058, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0214, 0x0062, 0x0232, 0x0214, 0x01d1, 0x0024, 0x005a, + 0x005a, 0x001a, 0x001a, 0x01a4, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0178, 0x00e4, 0x00cb, 0x017b, 0x0124, 0x0507, 0x050b, 0x027e, + 0x023a, 0x023a, 0x023a, 0x0024, 0x00cb, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0214, 0x0062, 0x00d7, 0x0233, 0x050f, 0x0044, 0x005a, + 0x005a, 0x001a, 0x001a, 0x01a4, 0x005a, 0x0513, 0x0513, 0x0513, + 0x0051, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0075, + 0x0158, 0x0062, 0x0124, 0x0058, 0x005a, 0x001a, 0x001a, 0x01a4, + 0x005a, 0x001a, 0x001a, 0x001a, 0x001a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x01d0, 0x00d2, 0x0124, 0x0062, + 0x005a, 0x001a, 0x001a, 0x01a4, 0x04b3, 0x0024, 0x0024, 0x0024, + 0x0214, 0x0062, 0x0062, 0x0241, 0x005a, 0x001a, 0x001a, 0x00b2, + 0x0024, 0x004d, 0x005a, 0x005a, 0x005b, 0x005a, 0x005a, 0x005a, + 0x005a, 0x017b, 0x015a, 0x0517, 0x051a, 0x051e, 0x04b3, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0024, 0x017a, 0x0062, 0x00d2, 0x017b, 0x024a, 0x0193, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x00c9, 0x0062, 0x0198, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0075, 0x0062, 0x0521, + 0x0524, 0x027e, 0x0528, 0x005a, 0x005a, 0x00c9, 0x00d7, 0x00e5, + 0x0024, 0x052c, 0x052e, 0x0062, 0x0062, 0x00d7, 0x01fb, 0x0511, + 0x0532, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0513, 0x0513, 0x0536, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, + 0x0058, 0x005a, 0x005a, 0x005a, 0x001a, 0x001a, 0x01a4, 0x005a, + 0x0024, 0x0024, 0x0070, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0178, 0x0062, 0x00b8, 0x0062, 0x0124, 0x027e, 0x053a, 0x005a, + 0x005a, 0x001a, 0x001a, 0x00b2, 0x0024, 0x0024, 0x0024, 0x0044, + 0x053e, 0x0024, 0x0024, 0x0024, 0x0024, 0x00d2, 0x0062, 0x0062, + 0x0062, 0x0542, 0x0062, 0x01d1, 0x012a, 0x005a, 0x005a, 0x0024, + 0x004d, 0x0072, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x00c9, + 0x00b8, 0x0105, 0x00b9, 0x0062, 0x0544, 0x005a, 0x005a, 0x001a, + 0x001a, 0x01a4, 0x005a, 0x0024, 0x0072, 0x0070, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0179, 0x017c, 0x0547, 0x0233, 0x0044, + 0x005a, 0x001a, 0x001a, 0x01a4, 0x005a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x054b, 0x0435, 0x0386, 0x005a, 0x054e, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x00e4, 0x00b8, 0x0155, + 0x0552, 0x0226, 0x0227, 0x0227, 0x005a, 0x005a, 0x0105, 0x005a, + 0x005a, 0x005a, 0x005a, 0x0044, 0x005a, 0x005a, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0090, 0x0449, + 0x0024, 0x0024, 0x0024, 0x0058, 0x005a, 0x005a, 0x0237, 0x0024, + 0x0024, 0x0024, 0x004d, 0x023a, 0x0386, 0x005a, 0x005a, 0x0024, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0556, 0x0559, 0x055b, + 0x036f, 0x02b5, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x055e, 0x0024, 0x0024, 0x0024, 0x0025, 0x0066, 0x0562, 0x0566, + 0x056a, 0x00a1, 0x0075, 0x0062, 0x0062, 0x0062, 0x00cb, 0x005a, + 0x005a, 0x0024, 0x0024, 0x0024, 0x036f, 0x0024, 0x0024, 0x0024, + 0x0024, 0x004d, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, + 0x005a, 0x00d2, 0x0062, 0x0062, 0x00e7, 0x00e5, 0x005a, 0x005a, + 0x005a, 0x005a, 0x001a, 0x001a, 0x01a4, 0x021f, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0058, 0x0062, 0x04cd, 0x005a, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0062, 0x0083, 0x01ed, 0x0024, 0x056e, + 0x005a, 0x005a, 0x001a, 0x001a, 0x0572, 0x0024, 0x0072, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x005a, 0x0056, 0x0575, 0x0575, + 0x0578, 0x01eb, 0x001a, 0x001a, 0x01a4, 0x005a, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0024, 0x01ea, 0x04c9, 0x005a, 0x0024, 0x0024, + 0x004d, 0x00d1, 0x017a, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, + 0x017b, 0x017b, 0x017b, 0x005a, 0x00d1, 0x0198, 0x0024, 0x0024, + 0x0024, 0x057c, 0x0580, 0x005a, 0x005a, 0x0584, 0x005a, 0x005a, + 0x005a, 0x0313, 0x0313, 0x0313, 0x0313, 0x0313, 0x0588, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x058a, 0x03b2, + 0x03b2, 0x03f5, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0313, + 0x058c, 0x0313, 0x0587, 0x03b3, 0x005a, 0x005a, 0x005a, 0x0590, + 0x005a, 0x005a, 0x005a, 0x005a, 0x0594, 0x0597, 0x005a, 0x005a, + 0x03fa, 0x005a, 0x005a, 0x03b2, 0x03b2, 0x03b2, 0x03b2, 0x0024, + 0x0024, 0x004d, 0x005a, 0x0024, 0x0024, 0x0024, 0x0044, 0x005a, + 0x0024, 0x0024, 0x0058, 0x059b, 0x0062, 0x005a, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, 0x0024, + 0x001a, 0x001a, 0x01a4, 0x005a, 0x0062, 0x0062, 0x0062, 0x00cb, + 0x0062, 0x0062, 0x0062, 0x0062, 0x00b8, 0x005a, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x004d, 0x0056, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x00e3, 0x0078, 0x017a, 0x0214, 0x0062, 0x0062, + 0x0062, 0x0198, 0x00c9, 0x0062, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0076, 0x0078, 0x0024, 0x0024, 0x0024, 0x0024, 0x0076, + 0x00bc, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x005a, 0x0313, + 0x0313, 0x0313, 0x0313, 0x0313, 0x058d, 0x005a, 0x005a, 0x0313, + 0x0313, 0x0313, 0x0313, 0x0313, 0x059f, 0x0044, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0070, 0x005c, + 0x0071, 0x0056, 0x0070, 0x0024, 0x0024, 0x0072, 0x0056, 0x0024, + 0x0056, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0072, + 0x004d, 0x0056, 0x0024, 0x0070, 0x0024, 0x0070, 0x0024, 0x0130, + 0x006b, 0x0024, 0x0070, 0x0024, 0x0024, 0x0024, 0x0058, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0114, 0x001a, 0x001a, + 0x001a, 0x001a, 0x001a, 0x001a, 0x001a, 0x001a, 0x0062, 0x0062, + 0x0062, 0x0062, 0x0062, 0x0198, 0x0075, 0x0062, 0x0062, 0x0062, + 0x00a1, 0x0024, 0x00a0, 0x0024, 0x0024, 0x05a3, 0x03a0, 0x005a, + 0x005a, 0x005a, 0x00d1, 0x0062, 0x0082, 0x0062, 0x0062, 0x0062, + 0x005a, 0x005a, 0x005a, 0x005a, 0x0056, 0x004d, 0x005a, 0x005a, + 0x005a, 0x005a, 0x005a, 0x0062, 0x00b8, 0x0062, 0x0062, 0x0062, + 0x0062, 0x012c, 0x0062, 0x00b9, 0x0122, 0x00b8, 0x005a, 0x0024, + 0x0024, 0x0024, 0x0024, 0x0058, 0x005a, 0x005a, 0x005a, 0x005a, + 0x00d1, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0044, 0x0062, 0x0198, 0x0024, 0x0058, 0x001a, 0x001a, 0x01a4, + 0x006b, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0120, 0x005a, 0x005a, 0x005a, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0062, 0x001a, 0x001a, 0x01a4, 0x0194, 0x0024, 0x0024, 0x0024, + 0x0062, 0x001a, 0x001a, 0x01a4, 0x005a, 0x0024, 0x0024, 0x0024, + 0x0076, 0x05a7, 0x001a, 0x0218, 0x005b, 0x0024, 0x004d, 0x0024, + 0x0071, 0x0024, 0x0024, 0x0024, 0x004d, 0x0024, 0x00f7, 0x0024, + 0x0024, 0x0062, 0x00b8, 0x005a, 0x005a, 0x0024, 0x0062, 0x0198, + 0x005a, 0x001a, 0x001a, 0x01a4, 0x05ab, 0x005a, 0x005a, 0x005a, + 0x005a, 0x0056, 0x0024, 0x0024, 0x0024, 0x0449, 0x0449, 0x0044, + 0x005a, 0x005a, 0x0056, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0024, 0x0071, 0x00f7, 0x0056, 0x0024, 0x004d, 0x0024, + 0x005d, 0x005a, 0x005c, 0x005b, 0x005d, 0x0056, 0x0071, 0x00f7, + 0x005d, 0x005d, 0x0071, 0x00f7, 0x004d, 0x0024, 0x004d, 0x0024, + 0x0056, 0x0130, 0x0024, 0x0024, 0x0072, 0x0024, 0x0024, 0x0024, + 0x0024, 0x005a, 0x0056, 0x0056, 0x0072, 0x0024, 0x0024, 0x0024, + 0x0024, 0x005a, 0x02e4, 0x05af, 0x02e4, 0x02e4, 0x02e4, 0x02e4, + 0x02e4, 0x02e4, 0x02e4, 0x02e4, 0x05b0, 0x02e4, 0x02e4, 0x02e4, + 0x02e4, 0x005a, 0x005a, 0x005a, 0x05b4, 0x005a, 0x005a, 0x005a, + 0x005a, 0x05b8, 0x005a, 0x005a, 0x005a, 0x005a, 0x006b, 0x02f9, + 0x05bc, 0x005a, 0x005a, 0x05be, 0x005a, 0x005a, 0x005a, 0x05c2, + 0x05c5, 0x05c6, 0x05c7, 0x005a, 0x005a, 0x005a, 0x05ca, 0x02e4, + 0x02e4, 0x02e4, 0x02e4, 0x05cc, 0x05ce, 0x05ce, 0x05ce, 0x05ce, + 0x05ce, 0x05ce, 0x05d2, 0x02e4, 0x02e4, 0x02e4, 0x03b2, 0x03b2, + 0x03ff, 0x03b2, 0x03b2, 0x03b2, 0x03fe, 0x05d6, 0x05d8, 0x05d9, + 0x02e4, 0x03b2, 0x03b2, 0x05dc, 0x02e4, 0x031b, 0x02e4, 0x02e4, + 0x02e4, 0x05d8, 0x031b, 0x02e4, 0x02e4, 0x02e4, 0x02e4, 0x02e4, + 0x02e4, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, + 0x05d8, 0x05af, 0x02e4, 0x02e4, 0x05df, 0x05d8, 0x0335, 0x05d8, + 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05e2, 0x05d8, + 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x02e4, 0x02e4, 0x05e6, 0x05d8, + 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05ea, 0x05d8, 0x05ec, 0x05d8, + 0x05d8, 0x05e0, 0x05b0, 0x05d8, 0x02e4, 0x02e4, 0x02e4, 0x05d8, + 0x05d8, 0x05d8, 0x05d8, 0x05af, 0x05af, 0x05ed, 0x05f0, 0x05d8, + 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05e0, 0x05e2, + 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05f4, + 0x05ec, 0x05d8, 0x05f7, 0x05ea, 0x05d8, 0x05d8, 0x05d8, 0x05d8, + 0x05d8, 0x05d8, 0x05d8, 0x02cb, 0x02fd, 0x05fa, 0x05d8, 0x05d8, + 0x05d8, 0x05f7, 0x02fd, 0x02fd, 0x05ec, 0x05d8, 0x05d8, 0x05f8, + 0x02fd, 0x02fd, 0x05fe, 0x0024, 0x02a7, 0x0602, 0x05e0, 0x05d8, + 0x05d8, 0x05d8, 0x05d8, 0x02e4, 0x02e4, 0x02e4, 0x02e4, 0x0606, + 0x02e4, 0x02e4, 0x02e4, 0x02e4, 0x02e4, 0x0334, 0x02e4, 0x02e4, + 0x02e4, 0x02e4, 0x02e4, 0x02f9, 0x02f9, 0x02e4, 0x02e4, 0x02e4, + 0x02e4, 0x02e4, 0x02f9, 0x0602, 0x05d8, 0x05d8, 0x05d8, 0x05d8, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x035f, 0x060a, 0x0024, + 0x05d8, 0x031b, 0x02e4, 0x05af, 0x05e0, 0x05df, 0x02e4, 0x05d8, + 0x02e4, 0x02e4, 0x05b0, 0x05af, 0x02e4, 0x05d8, 0x05d8, 0x05af, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x02e4, 0x02e4, 0x02e4, + 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x02e3, 0x02e4, 0x02e4, + 0x05d8, 0x05d8, 0x05d8, 0x02e4, 0x05af, 0x02e4, 0x02e4, 0x02e4, + 0x0024, 0x0024, 0x0024, 0x02a3, 0x0024, 0x0024, 0x0024, 0x0024, + 0x02a3, 0x02a3, 0x0024, 0x0024, 0x02a1, 0x02a3, 0x0024, 0x0024, + 0x02a3, 0x02a3, 0x0024, 0x0024, 0x0024, 0x0024, 0x02a1, 0x02f9, + 0x02f9, 0x02f9, 0x02a3, 0x02ed, 0x02a3, 0x02a3, 0x02a3, 0x02a3, + 0x02a3, 0x02a3, 0x02a3, 0x02a3, 0x0024, 0x0024, 0x0024, 0x05d8, + 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x060e, 0x05d8, 0x060f, + 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x02f9, 0x02f9, + 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02e4, 0x02e4, + 0x02e4, 0x02e4, 0x05d8, 0x05d8, 0x05d8, 0x05af, 0x05d8, 0x05d8, + 0x031b, 0x05b0, 0x05d8, 0x05d8, 0x05d8, 0x05d8, 0x05e0, 0x02e4, + 0x0344, 0x05d8, 0x05d8, 0x05d8, 0x02cb, 0x05d8, 0x05d8, 0x031b, + 0x02e4, 0x05d8, 0x05d8, 0x05af, 0x02e4, 0x02e4, 0x02e4, 0x02e4, + 0x02e4, 0x02e4, 0x02e4, 0x0613, 0x03b2, 0x03b2, 0x03b2, 0x03b2, + 0x03b2, 0x03b2, 0x03b2, 0x03b7, 0x0617, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0062, 0x0062, 0x0062, 0x0062, + 0x0000, 0x0000, 0x0000, 0x0000, +]; +#[rustfmt::skip] +pub const STAGE3: [u16; 1563] = [ + 0x001d, 0x001d, 0x001d, 0x001d, + 0x015d, 0x001f, 0x001d, 0x001e, + 0x001d, 0x001d, 0x0110, 0x02d0, + 0x0410, 0x0590, 0x0510, 0x04d0, + 0x0590, 0x0410, 0x03d0, 0x0290, + 0x0590, 0x0510, 0x0450, 0x01d0, + 0x0450, 0x0550, 0x0490, 0x0490, + 0x0490, 0x0490, 0x0450, 0x0450, + 0x0590, 0x0590, 0x0590, 0x02d0, + 0x0590, 0x0590, 0x0590, 0x0590, + 0x03d0, 0x0510, 0x0290, 0x0590, + 0x0590, 0x0150, 0x0210, 0x0590, + 0x001d, 0x00d0, 0x03d0, 0x04d0, + 0x0510, 0x0510, 0x0590, 0x0010, + 0x059b, 0x0010, 0x0410, 0x0590, + 0x0151, 0x059b, 0x0590, 0x04d0, + 0x0510, 0x0010, 0x0010, 0x0190, + 0x0590, 0x0010, 0x0010, 0x0010, + 0x0410, 0x0010, 0x0010, 0x0010, + 0x03d0, 0x0590, 0x0590, 0x0590, + 0x0010, 0x0190, 0x0010, 0x0010, + 0x0010, 0x0190, 0x0010, 0x0590, + 0x0590, 0x0590, 0x0010, 0x0010, + 0x0010, 0x0010, 0x0590, 0x0010, + 0x0590, 0x0190, 0x0001, 0x0001, + 0x0001, 0x0001, 0x00c1, 0x00c1, + 0x00c1, 0x00c1, 0x0001, 0x0010, + 0x0010, 0x0590, 0x0590, 0x0450, + 0x0590, 0x0010, 0x0590, 0x0590, + 0x0010, 0x0590, 0x0590, 0x0590, + 0x0001, 0x0001, 0x0590, 0x0590, + 0x0450, 0x0150, 0x0010, 0x0590, + 0x0590, 0x0510, 0x0010, 0x0001, + 0x0001, 0x0001, 0x0150, 0x0001, + 0x0001, 0x02d0, 0x0001, 0x0483, + 0x0483, 0x0483, 0x0483, 0x0590, + 0x0590, 0x04d0, 0x04d0, 0x04d0, + 0x0001, 0x0001, 0x0001, 0x02d0, + 0x02d0, 0x02d0, 0x0490, 0x0490, + 0x04d0, 0x0490, 0x0590, 0x0590, + 0x0590, 0x0001, 0x0590, 0x0590, + 0x0590, 0x02d0, 0x0590, 0x0001, + 0x0001, 0x0483, 0x0590, 0x0001, + 0x0590, 0x0590, 0x0001, 0x0590, + 0x0001, 0x0001, 0x0490, 0x0490, + 0x0590, 0x0590, 0x0010, 0x0583, + 0x0001, 0x0001, 0x0001, 0x0010, + 0x0001, 0x0590, 0x0010, 0x0010, + 0x0450, 0x02d0, 0x0590, 0x0010, + 0x0001, 0x0510, 0x0510, 0x0001, + 0x0001, 0x0590, 0x0001, 0x0001, + 0x0001, 0x0010, 0x0010, 0x0483, + 0x0483, 0x0010, 0x0010, 0x0010, + 0x0001, 0x0001, 0x0483, 0x0001, + 0x0001, 0x0001, 0x0011, 0x0590, + 0x059a, 0x059a, 0x059a, 0x059a, + 0x0001, 0x0011, 0x0001, 0x0590, + 0x0011, 0x0011, 0x0001, 0x0001, + 0x0001, 0x0011, 0x0011, 0x0011, + 0x0009, 0x0011, 0x0011, 0x0150, + 0x0150, 0x0490, 0x0490, 0x0590, + 0x0001, 0x0011, 0x0011, 0x0590, + 0x0010, 0x0010, 0x0590, 0x059a, + 0x0010, 0x059a, 0x059a, 0x0010, + 0x059a, 0x0010, 0x0010, 0x059a, + 0x059a, 0x0010, 0x0010, 0x0001, + 0x0010, 0x0010, 0x0011, 0x0010, + 0x0010, 0x0011, 0x0009, 0x0590, + 0x0010, 0x0010, 0x0010, 0x0011, + 0x0010, 0x0010, 0x0490, 0x0490, + 0x059a, 0x059a, 0x04d0, 0x04d0, + 0x0590, 0x04d0, 0x0590, 0x0510, + 0x0590, 0x0590, 0x0001, 0x0010, + 0x0001, 0x0001, 0x0011, 0x0001, + 0x0010, 0x0011, 0x0011, 0x0001, + 0x0001, 0x0010, 0x0010, 0x0001, + 0x0590, 0x0010, 0x0590, 0x0010, + 0x0590, 0x0001, 0x0590, 0x0010, + 0x059a, 0x059a, 0x059a, 0x0001, + 0x0011, 0x0010, 0x0011, 0x0009, + 0x0010, 0x0010, 0x0590, 0x0510, + 0x0010, 0x0010, 0x059a, 0x0001, + 0x0001, 0x0010, 0x0001, 0x0011, + 0x0011, 0x0001, 0x0590, 0x0011, + 0x0001, 0x0590, 0x059a, 0x0590, + 0x0590, 0x0010, 0x0010, 0x0011, + 0x0011, 0x0001, 0x0011, 0x0011, + 0x0010, 0x0011, 0x0011, 0x0001, + 0x0010, 0x0010, 0x0590, 0x0510, + 0x0590, 0x0010, 0x0011, 0x0010, + 0x0001, 0x0001, 0x0009, 0x0010, + 0x0010, 0x0001, 0x0001, 0x0010, + 0x059a, 0x059a, 0x059a, 0x0010, + 0x0590, 0x0010, 0x0010, 0x0190, + 0x0590, 0x0590, 0x0590, 0x0011, + 0x0011, 0x0011, 0x0011, 0x0010, + 0x0001, 0x0011, 0x0010, 0x0011, + 0x0011, 0x0010, 0x0590, 0x0590, + 0x0011, 0x0009, 0x0593, 0x0590, + 0x04d0, 0x0590, 0x0590, 0x0001, + 0x0010, 0x0001, 0x0010, 0x0011, + 0x0010, 0x0010, 0x0010, 0x0510, + 0x0001, 0x0001, 0x0001, 0x0590, + 0x0490, 0x0490, 0x0150, 0x0150, + 0x0001, 0x0010, 0x0010, 0x0010, + 0x0490, 0x0490, 0x0010, 0x0010, + 0x0590, 0x0190, 0x0190, 0x0190, + 0x0590, 0x0190, 0x0190, 0x00d0, + 0x0190, 0x0190, 0x0150, 0x00d0, + 0x02d0, 0x02d0, 0x02d0, 0x00d0, + 0x0590, 0x0150, 0x0001, 0x0590, + 0x0001, 0x03d0, 0x0210, 0x0011, + 0x0011, 0x0001, 0x0001, 0x0001, + 0x0151, 0x0001, 0x0010, 0x0150, + 0x0150, 0x0190, 0x0190, 0x0150, + 0x0190, 0x0590, 0x00d0, 0x00d0, + 0x0010, 0x0001, 0x0011, 0x0001, + 0x0001, 0x0024, 0x0024, 0x0024, + 0x0024, 0x0015, 0x0015, 0x0015, + 0x0015, 0x0016, 0x0016, 0x0016, + 0x0016, 0x0590, 0x0150, 0x0590, + 0x0590, 0x0590, 0x0210, 0x0010, + 0x0010, 0x0010, 0x0590, 0x0590, + 0x0590, 0x0150, 0x0150, 0x0590, + 0x0590, 0x0001, 0x0011, 0x0010, + 0x0010, 0x0011, 0x0150, 0x0150, + 0x0010, 0x0590, 0x0010, 0x0001, + 0x0001, 0x0150, 0x0150, 0x0350, + 0x0010, 0x0150, 0x0590, 0x0150, + 0x0510, 0x0590, 0x0590, 0x02d0, + 0x02d0, 0x0150, 0x0150, 0x0190, + 0x0590, 0x02d0, 0x02d0, 0x0590, + 0x0001, 0x0001, 0x00c1, 0x0001, + 0x0011, 0x0011, 0x0011, 0x0001, + 0x0490, 0x0490, 0x0490, 0x0010, + 0x0011, 0x0001, 0x0011, 0x0010, + 0x0010, 0x0150, 0x0150, 0x05d0, + 0x0150, 0x0150, 0x0150, 0x05d0, + 0x05d0, 0x05d0, 0x05d0, 0x0001, + 0x0590, 0x0011, 0x0001, 0x0001, + 0x0010, 0x0010, 0x0001, 0x0011, + 0x0001, 0x0011, 0x0001, 0x0010, + 0x0010, 0x0010, 0x0150, 0x0150, + 0x0150, 0x0150, 0x0001, 0x0590, + 0x0590, 0x0011, 0x0001, 0x0001, + 0x0590, 0x0010, 0x0001, 0x00c1, + 0x0001, 0x0001, 0x0001, 0x0590, + 0x0190, 0x0590, 0x0010, 0x0150, + 0x0150, 0x0150, 0x00d0, 0x0150, + 0x0150, 0x0150, 0x0081, 0x0001, + 0x000c, 0x0001, 0x0001, 0x0410, + 0x0410, 0x03d0, 0x0410, 0x0310, + 0x0310, 0x0310, 0x0150, 0x001d, + 0x001d, 0x0001, 0x0001, 0x0001, + 0x00d0, 0x04d0, 0x04d0, 0x04d0, + 0x04d0, 0x0590, 0x0410, 0x0410, + 0x0010, 0x035b, 0x0350, 0x0590, + 0x0590, 0x0450, 0x03d0, 0x0210, + 0x0350, 0x035b, 0x0590, 0x0590, + 0x0150, 0x04d0, 0x0590, 0x0150, + 0x0150, 0x0150, 0x0041, 0x0581, + 0x0581, 0x0581, 0x001d, 0x0001, + 0x0001, 0x0590, 0x03d0, 0x0210, + 0x0010, 0x0510, 0x0510, 0x0510, + 0x0510, 0x04d0, 0x0510, 0x0510, + 0x0510, 0x0590, 0x0590, 0x0590, + 0x04d0, 0x0590, 0x0590, 0x0510, + 0x0590, 0x0010, 0x001b, 0x0590, + 0x059b, 0x0590, 0x0590, 0x001b, + 0x001b, 0x001b, 0x001b, 0x0590, + 0x0590, 0x059b, 0x059b, 0x0590, + 0x0010, 0x0510, 0x0510, 0x0590, + 0x0590, 0x0590, 0x0310, 0x03d0, + 0x0210, 0x03d0, 0x0210, 0x0590, + 0x0590, 0x05eb, 0x05eb, 0x059b, + 0x03a0, 0x0220, 0x0590, 0x059b, + 0x0590, 0x0590, 0x0590, 0x059b, + 0x0590, 0x05ab, 0x05ab, 0x05ab, + 0x059b, 0x059b, 0x059b, 0x05eb, + 0x05db, 0x05db, 0x05eb, 0x059b, + 0x059b, 0x059b, 0x0590, 0x0010, + 0x0010, 0x001b, 0x0010, 0x0010, + 0x059b, 0x059b, 0x0590, 0x0590, + 0x001b, 0x0010, 0x0590, 0x0590, + 0x059b, 0x05ab, 0x05ab, 0x0590, + 0x05db, 0x05db, 0x05db, 0x05db, + 0x059b, 0x001b, 0x0010, 0x059b, + 0x001b, 0x059b, 0x059b, 0x001b, + 0x001b, 0x05eb, 0x05eb, 0x001b, + 0x001b, 0x05db, 0x059b, 0x05db, + 0x05db, 0x059b, 0x059b, 0x059b, + 0x059b, 0x05ab, 0x05ab, 0x05ab, + 0x05ab, 0x059b, 0x05db, 0x05db, + 0x05db, 0x001b, 0x059b, 0x001b, + 0x059b, 0x001b, 0x001b, 0x059b, + 0x001b, 0x05db, 0x001b, 0x001b, + 0x059b, 0x0590, 0x0590, 0x05a0, + 0x05a0, 0x05a0, 0x05a0, 0x059b, + 0x05ab, 0x059b, 0x059b, 0x05eb, + 0x05eb, 0x05db, 0x05db, 0x001b, + 0x001b, 0x001b, 0x05db, 0x05ab, + 0x05db, 0x05db, 0x001b, 0x05db, + 0x05eb, 0x001b, 0x001b, 0x001b, + 0x05db, 0x001b, 0x001b, 0x05db, + 0x001b, 0x001b, 0x05eb, 0x001b, + 0x05db, 0x05eb, 0x05eb, 0x05db, + 0x05eb, 0x001b, 0x05db, 0x05db, + 0x05eb, 0x001b, 0x05eb, 0x05db, + 0x05db, 0x05ab, 0x0590, 0x0590, + 0x05db, 0x05db, 0x05eb, 0x05eb, + 0x059b, 0x0590, 0x059b, 0x0590, + 0x05ab, 0x0590, 0x0590, 0x0590, + 0x059b, 0x0590, 0x0590, 0x059b, + 0x05ab, 0x0590, 0x05ab, 0x0590, + 0x0590, 0x0590, 0x05ab, 0x05ab, + 0x0590, 0x002b, 0x0590, 0x0590, + 0x0590, 0x0410, 0x0410, 0x0410, + 0x0410, 0x0590, 0x02d0, 0x02db, + 0x03d0, 0x0210, 0x0010, 0x0010, + 0x0590, 0x03d0, 0x0210, 0x0590, + 0x0590, 0x03d0, 0x0210, 0x03d0, + 0x0210, 0x03d0, 0x0210, 0x0590, + 0x0590, 0x0590, 0x059b, 0x059b, + 0x059b, 0x0590, 0x002b, 0x0010, + 0x0010, 0x02d0, 0x0150, 0x0150, + 0x0590, 0x02d0, 0x0150, 0x0010, + 0x0010, 0x0010, 0x0410, 0x0410, + 0x0150, 0x0150, 0x0590, 0x0150, + 0x03d0, 0x0150, 0x0590, 0x0590, + 0x0410, 0x0410, 0x0590, 0x0590, + 0x0410, 0x0410, 0x03d0, 0x0210, + 0x0150, 0x0150, 0x02d0, 0x0590, + 0x0150, 0x0150, 0x0150, 0x0590, + 0x0150, 0x0150, 0x03d0, 0x0150, + 0x02d0, 0x03d0, 0x0290, 0x03d0, + 0x0290, 0x03d0, 0x0290, 0x0150, + 0x0010, 0x0010, 0x05e0, 0x05e0, + 0x05e0, 0x05e0, 0x0010, 0x05e0, + 0x05e0, 0x0010, 0x0010, 0x0160, + 0x0220, 0x0220, 0x05e0, 0x0360, + 0x05e0, 0x05e0, 0x03a0, 0x0220, + 0x03a0, 0x0220, 0x05e0, 0x05e0, + 0x0360, 0x03a0, 0x0220, 0x0220, + 0x05e0, 0x05e0, 0x0001, 0x0001, + 0x0021, 0x0021, 0x05eb, 0x05e0, + 0x05e0, 0x05e0, 0x0020, 0x05e0, + 0x05e0, 0x05e0, 0x0360, 0x05eb, + 0x05e0, 0x05d0, 0x0010, 0x0020, + 0x05e0, 0x0020, 0x05e0, 0x0020, + 0x0020, 0x0010, 0x0001, 0x0001, + 0x0360, 0x0360, 0x0360, 0x05e0, + 0x0360, 0x0020, 0x05e0, 0x0020, + 0x0020, 0x05e0, 0x0020, 0x0360, + 0x0360, 0x05e0, 0x0010, 0x0010, + 0x0010, 0x05e0, 0x0020, 0x0020, + 0x0020, 0x0020, 0x05e0, 0x05e0, + 0x05e0, 0x05eb, 0x05e0, 0x05e0, + 0x0590, 0x0150, 0x02d0, 0x0150, + 0x0001, 0x0001, 0x0590, 0x0150, + 0x0011, 0x0001, 0x0001, 0x0011, + 0x04d0, 0x0590, 0x0010, 0x0010, + 0x0190, 0x0190, 0x02d0, 0x02d0, + 0x0011, 0x0011, 0x0590, 0x0590, + 0x0190, 0x0590, 0x0590, 0x0001, + 0x0024, 0x0010, 0x0010, 0x0010, + 0x0011, 0x05d0, 0x05d0, 0x05d0, + 0x0150, 0x05d0, 0x05d0, 0x0010, + 0x0150, 0x0010, 0x0010, 0x05d0, + 0x05d0, 0x0590, 0x0011, 0x0001, + 0x0010, 0x0001, 0x0011, 0x0011, + 0x0150, 0x0027, 0x0028, 0x0028, + 0x0028, 0x0028, 0x0015, 0x0015, + 0x0015, 0x0010, 0x0010, 0x0010, + 0x0016, 0x0590, 0x0590, 0x0210, + 0x03d0, 0x04d0, 0x0590, 0x0590, + 0x0590, 0x0001, 0x0001, 0x0001, + 0x0021, 0x0220, 0x0220, 0x0220, + 0x0360, 0x02e0, 0x02e0, 0x03a0, + 0x0220, 0x0320, 0x0010, 0x0010, + 0x00c1, 0x0001, 0x00c1, 0x0001, + 0x00c1, 0x00c1, 0x0001, 0x0220, + 0x03a0, 0x0220, 0x03a0, 0x0220, + 0x05e0, 0x05e0, 0x03a0, 0x0220, + 0x05e0, 0x05e0, 0x05e0, 0x0220, + 0x05e0, 0x0220, 0x0010, 0x0360, + 0x0360, 0x02e0, 0x02e0, 0x05e0, + 0x0520, 0x04e0, 0x05e0, 0x0590, + 0x0010, 0x0010, 0x0041, 0x0010, + 0x02e0, 0x05e0, 0x05e0, 0x0520, + 0x04e0, 0x05e0, 0x05e0, 0x0220, + 0x05e0, 0x0220, 0x05e0, 0x05e0, + 0x0360, 0x0360, 0x05e0, 0x05e0, + 0x05e0, 0x02e0, 0x05e0, 0x05e0, + 0x05e0, 0x03a0, 0x05e0, 0x0220, + 0x05e0, 0x03a0, 0x0220, 0x0210, + 0x0390, 0x0210, 0x0350, 0x05d0, + 0x0010, 0x05d0, 0x05d0, 0x05d0, + 0x0351, 0x0351, 0x05d0, 0x05d0, + 0x05d0, 0x0010, 0x0010, 0x0010, + 0x04e0, 0x0520, 0x05e0, 0x05e0, + 0x0520, 0x0520, 0x0010, 0x0150, + 0x0150, 0x0150, 0x0010, 0x0590, + 0x0001, 0x0010, 0x0010, 0x0590, + 0x0590, 0x0010, 0x0150, 0x0590, + 0x0001, 0x0001, 0x0010, 0x0150, + 0x0150, 0x0310, 0x0010, 0x0001, + 0x0001, 0x0150, 0x0590, 0x0590, + 0x0010, 0x0001, 0x0150, 0x0010, + 0x0010, 0x0010, 0x00c1, 0x0590, + 0x0483, 0x0150, 0x0150, 0x0010, + 0x0483, 0x0010, 0x0010, 0x0001, + 0x0010, 0x0490, 0x0490, 0x0590, + 0x0011, 0x0011, 0x0590, 0x0593, + 0x0593, 0x0150, 0x0001, 0x0001, + 0x0001, 0x0490, 0x0490, 0x0590, + 0x0190, 0x0150, 0x0590, 0x0001, + 0x0590, 0x0150, 0x0010, 0x0010, + 0x0001, 0x0150, 0x0011, 0x0011, + 0x0010, 0x0010, 0x0010, 0x05d0, + 0x0010, 0x0011, 0x0010, 0x0011, + 0x0001, 0x0013, 0x0001, 0x05d0, + 0x05d0, 0x0010, 0x05d0, 0x0001, + 0x0190, 0x0150, 0x0150, 0x02d0, + 0x02d0, 0x0590, 0x0590, 0x0001, + 0x0150, 0x0150, 0x0590, 0x0190, + 0x0190, 0x0190, 0x0190, 0x0011, + 0x0010, 0x0010, 0x0001, 0x0011, + 0x0001, 0x0013, 0x0011, 0x0013, + 0x0011, 0x0001, 0x0011, 0x0593, + 0x0001, 0x0001, 0x0001, 0x0190, + 0x0150, 0x0190, 0x0590, 0x0001, + 0x0593, 0x0593, 0x0593, 0x0593, + 0x0001, 0x0001, 0x0190, 0x0150, + 0x0150, 0x0010, 0x0190, 0x0190, + 0x0010, 0x0010, 0x0150, 0x0150, + 0x0010, 0x0010, 0x0190, 0x02d0, + 0x0590, 0x0590, 0x0010, 0x0011, + 0x0001, 0x0001, 0x0593, 0x0001, + 0x0001, 0x0010, 0x0011, 0x0010, + 0x0010, 0x0150, 0x0001, 0x0001, + 0x0013, 0x0011, 0x0001, 0x0011, + 0x0001, 0x0150, 0x03d0, 0x03d0, + 0x03d0, 0x0210, 0x0210, 0x0590, + 0x0590, 0x0210, 0x0590, 0x03d0, + 0x0210, 0x0210, 0x00c1, 0x00c1, + 0x00c1, 0x03c1, 0x0201, 0x00c1, + 0x00c1, 0x00c1, 0x03c1, 0x0201, + 0x03c1, 0x0201, 0x0150, 0x0590, + 0x0010, 0x0010, 0x0490, 0x0490, + 0x0010, 0x0590, 0x0590, 0x0590, + 0x0595, 0x0595, 0x0595, 0x0590, + 0x0360, 0x0360, 0x0360, 0x0360, + 0x00c1, 0x0010, 0x0010, 0x0010, + 0x0021, 0x0021, 0x0010, 0x0010, + 0x05a0, 0x05a0, 0x0010, 0x0010, + 0x0010, 0x05a0, 0x05a0, 0x05a0, + 0x0010, 0x0010, 0x0020, 0x0010, + 0x0020, 0x0020, 0x0020, 0x0010, + 0x0020, 0x0010, 0x0010, 0x0590, + 0x0001, 0x0001, 0x0150, 0x05a0, + 0x05a0, 0x05a0, 0x0590, 0x0001, + 0x0590, 0x0590, 0x0150, 0x0590, + 0x0490, 0x0490, 0x0490, 0x0010, + 0x0010, 0x03d0, 0x03d0, 0x05eb, + 0x05db, 0x05db, 0x05db, 0x05eb, + 0x0010, 0x059b, 0x059b, 0x059b, + 0x0010, 0x0010, 0x0590, 0x059b, + 0x001b, 0x001b, 0x0010, 0x0010, + 0x001b, 0x001b, 0x0010, 0x0010, + 0x002b, 0x0010, 0x002b, 0x002b, + 0x002b, 0x002b, 0x0010, 0x059b, + 0x05db, 0x05db, 0x0012, 0x0012, + 0x0012, 0x0012, 0x05e0, 0x05eb, + 0x05eb, 0x05db, 0x05e0, 0x05e0, + 0x05eb, 0x05eb, 0x05eb, 0x05eb, + 0x05e0, 0x05db, 0x05db, 0x05db, + 0x05eb, 0x05eb, 0x05eb, 0x05db, + 0x05eb, 0x05eb, 0x059b, 0x059b, + 0x05db, 0x05db, 0x05eb, 0x05ab, + 0x05ab, 0x05eb, 0x05eb, 0x05eb, + 0x05e1, 0x05e1, 0x05e1, 0x05e1, + 0x05ab, 0x05eb, 0x05ab, 0x05eb, + 0x05eb, 0x05eb, 0x05ab, 0x05ab, + 0x05ab, 0x05eb, 0x05ab, 0x05ab, + 0x0590, 0x0590, 0x059b, 0x059b, + 0x05db, 0x05eb, 0x05db, 0x05db, + 0x05eb, 0x05db, 0x0410, 0x0350, + 0x0350, 0x0350, 0x05eb, 0x05eb, + 0x05eb, 0x05d0, 0x05eb, 0x05db, + 0x05db, 0x0010, 0x0010, 0x001d, + 0x0001, 0x001d, 0x001d, +]; +#[rustfmt::skip] +pub const GRAPHEME_JOIN_RULES: [[u32; 16]; 2] = [ + [ + 0b11111100111100111111111111110011, + 0b11111100111100111111111111110011, + 0b11111100111100111111111111010011, + 0b11111100000000000000000000000000, + 0b11111100111100000011000011110011, + 0b11111100111100111100001111110011, + 0b11111100111100111100111111110011, + 0b11111100111100111100001111110011, + 0b11111100111100111100111111110011, + 0b11111100110000111111111111110011, + 0b11111100111100111111111111110011, + 0b11111100111100111111111111110011, + 0b11111100001100111111111111110011, + 0b11111111111111111111111111111111, + 0b00111111111111111111111111111111, + 0b11111111111111111111111111111111, + ], + [ + 0b00111100111111111111110011111111, + 0b11111111111111111111111111001111, + 0b11111111111111111111111111111111, + 0b11111111111111111111111111111111, + 0b00111100111111111111110011111111, + 0b00111100111111111111110011111111, + 0b00000000000000000000000011111100, + 0b00111100000011000011110011111111, + 0b00111100111100001111110011111111, + 0b00111100111100111111110011111111, + 0b00111100111100001111110011111111, + 0b00111100111100111111110011111111, + 0b00110000111111111111110011111111, + 0b00111100111111111111110011111111, + 0b00111100111111111111110011111111, + 0b00001100111111111111110011111111, + ], +]; +#[rustfmt::skip] +pub const LINE_BREAK_JOIN_RULES: [u32; 24] = [ + 0b00000000001000110011111110101010, + 0b00000000111111111111111111111111, + 0b00000000000000000000000000000000, + 0b00000000111111111111111111111111, + 0b00000000001000010011111110100010, + 0b00000000001000110011111110100010, + 0b00000000111111111111111111111111, + 0b00000000001001110011111110100010, + 0b00000000001110110011111110101010, + 0b00000000001110110011111110101010, + 0b00000000011111110011111110101010, + 0b00000000001000110011111110101010, + 0b00000000001000110011111110101010, + 0b00000000001000110011111110101010, + 0b00000000111111111111111111111111, + 0b00000000111111111111111111111111, + 0b00000000111111111111111111111111, + 0b00000000011111110011111110101010, + 0b00000000011111111011111110101010, + 0b00000000011001111111111110101010, + 0b00000000111001111111111110101010, + 0b00000000001111110011111110101010, + 0b00000000011111111011111110101010, + 0b00000000001010110011111110101010, +]; +#[inline(always)] +pub fn ucd_grapheme_cluster_lookup(cp: char) -> usize { + let cp = cp as usize; + let s = STAGE0[cp >> 11] as usize; + let s = STAGE1[s + ((cp >> 5) & 63)] as usize; + let s = STAGE2[s + ((cp >> 2) & 7)] as usize; + STAGE3[s + (cp & 3)] as usize +} +#[inline(always)] +pub fn ucd_grapheme_cluster_joins(state: u32, lead: usize, trail: usize) -> u32 { + let l = lead & 15; + let t = trail & 15; + (GRAPHEME_JOIN_RULES[state as usize][l] >> (t * 2)) & 3 +} +#[inline(always)] +pub fn ucd_grapheme_cluster_joins_done(state: u32) -> bool { + state == 3 +} +#[inline(always)] +pub fn ucd_grapheme_cluster_character_width(val: usize) -> usize { + (val >> 4) & 3 +} +#[inline(always)] +pub fn ucd_grapheme_cluster_is_newline(val: usize) -> bool { + (val & 15) > 13 +} +#[inline(always)] +pub fn ucd_line_break_joins(lead: usize, trail: usize) -> bool { + let l = lead >> 6; + let t = trail >> 6; + ((LINE_BREAK_JOIN_RULES[l] >> t) & 1) != 0 +} +// END: Generated by grapheme-table-gen diff --git a/src/utf8.rs b/src/utf8.rs new file mode 100644 index 0000000..9640206 --- /dev/null +++ b/src/utf8.rs @@ -0,0 +1,217 @@ +use crate::helpers; +use std::{hint, iter, mem}; + +#[derive(Clone, Copy)] +pub struct Utf8Chars<'a> { + source: &'a [u8], + offset: usize, +} + +impl<'a> Utf8Chars<'a> { + pub fn new(source: &'a [u8], offset: usize) -> Self { + Self { source, offset } + } + + pub fn offset(&self) -> usize { + self.offset + } + + pub fn seek(&mut self, offset: usize) { + self.offset = offset; + } + + #[inline(always)] + fn fffd() -> Option { + // Improves performance by ~5% and reduces code size. + helpers::cold_path(); + Some('\u{FFFD}') + } +} + +impl Iterator for Utf8Chars<'_> { + type Item = char; + + fn next(&mut self) -> Option { + if self.offset >= self.source.len() { + return None; + } + + let c = self.source[self.offset]; + self.offset += 1; + + // See: https://datatracker.ietf.org/doc/html/rfc3629 + // as well as ICU's `utf8.h` for the bitmask approach. + + // UTF8-1 = %x00-7F + if (c & 0x80) == 0 { + return Some(c as char); + } + + if self.offset >= self.source.len() { + return Self::fffd(); + } + + let mut cp = c as u32; + + if cp < 0xE0 { + // UTF8-2 = %xC2-DF UTF8-tail + + if cp < 0xC2 { + return Self::fffd(); + } + + // The lead byte is 110xxxxx + // -> Strip off the 110 prefix + cp &= !0xE0; + } else if cp < 0xF0 { + // UTF8-3 = + // %xE0 %xA0-BF UTF8-tail + // %xE1-EC UTF8-tail UTF8-tail + // %xED %x80-9F UTF8-tail + // %xEE-EF UTF8-tail UTF8-tail + + // This is a pretty neat approach seen in ICU4C, because it's a 1:1 translation of the RFC. + // I don't understand why others don't do the same thing. It's rather performant. + const BITS_80_9F: u8 = 1 << 0b100; // 0x80-9F, aka 0b100xxxxx + const BITS_A0_BF: u8 = 1 << 0b101; // 0xA0-BF, aka 0b101xxxxx + const BITS_BOTH: u8 = BITS_80_9F | BITS_A0_BF; + const LEAD_TRAIL1_BITS: [u8; 16] = [ + // v-- lead byte + BITS_A0_BF, // 0xE0 + BITS_BOTH, // 0xE1 + BITS_BOTH, // 0xE2 + BITS_BOTH, // 0xE3 + BITS_BOTH, // 0xE4 + BITS_BOTH, // 0xE5 + BITS_BOTH, // 0xE6 + BITS_BOTH, // 0xE7 + BITS_BOTH, // 0xE8 + BITS_BOTH, // 0xE9 + BITS_BOTH, // 0xEA + BITS_BOTH, // 0xEB + BITS_BOTH, // 0xEC + BITS_80_9F, // 0xED + BITS_BOTH, // 0xEE + BITS_BOTH, // 0xEF + ]; + + // The lead byte is 1110xxxx + // -> Strip off the 1110 prefix + cp &= !0xF0; + + let t = self.source[self.offset]; + if LEAD_TRAIL1_BITS[cp as usize] & (1 << (t >> 5)) == 0 { + return Self::fffd(); + } + cp = (cp << 6) | (t as u32 & 0x3F); + + self.offset += 1; + if self.offset >= self.source.len() { + return Self::fffd(); + } + } else { + // UTF8-4 = + // %xF0 %x90-BF UTF8-tail UTF8-tail + // %xF1-F3 UTF8-tail UTF8-tail UTF8-tail + // %xF4 %x80-8F UTF8-tail UTF8-tail + + // This is similar to the above, but with the indices flipped: + // The trail byte is the index and the lead byte mask is the value. + // This is because the split at 0x90 requires more bits than fit into an u8. + const TRAIL1_LEAD_BITS: [u8; 16] = [ + // +------ 0xF4 lead + // |+----- 0xF3 lead + // ||+---- 0xF2 lead + // |||+--- 0xF1 lead + // ||||+-- 0xF0 lead + // vvvvv + 0b_00000, // + 0b_00000, // + 0b_00000, // + 0b_00000, // + 0b_00000, // + 0b_00000, // + 0b_00000, // trail bytes: + 0b_00000, // + 0b_11110, // 0x80-8F -> 0x80-8F can be preceded by 0xF1-F4 + 0b_01111, // 0x90-9F -v + 0b_01111, // 0xA0-AF -> 0x90-BF can be preceded by 0xF0-F3 + 0b_01111, // 0xB0-BF -^ + 0b_00000, // + 0b_00000, // + 0b_00000, // + 0b_00000, // + ]; + + // The lead byte *may* be 11110xxx, but could also be e.g. 11111xxx. + // -> Only strip off the 1111 prefix + cp &= !0xF0; + + // Now we can verify if it's actually <= 0xF4. + if cp > 4 { + return Self::fffd(); + } + + let t = self.source[self.offset]; + if TRAIL1_LEAD_BITS[(t >> 4) as usize] & (1 << cp) == 0 { + return Self::fffd(); + } + cp = (cp << 6) | (t as u32 & 0x3F); + + self.offset += 1; + if self.offset >= self.source.len() { + return Self::fffd(); + } + + // UTF8-tail = %x80-BF + let t = self.source[self.offset] as u32 - 0x80; + if t > 0x3F { + return Self::fffd(); + } + cp = (cp << 6) | t; + + self.offset += 1; + if self.offset >= self.source.len() { + return Self::fffd(); + } + } + + unsafe { hint::assert_unchecked(self.offset < self.source.len()) }; + + // UTF8-tail = %x80-BF + let t = self.source[self.offset] as u32 - 0x80; + if t > 0x3F { + return Self::fffd(); + } + cp = (cp << 6) | t; + + self.offset += 1; + Some(unsafe { mem::transmute(cp) }) + } +} + +impl iter::FusedIterator for Utf8Chars<'_> {} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_broken_utf8() { + let source = [b'a', 0xED, 0xA0, 0x80, b'b']; + let mut chars = Utf8Chars::new(&source, 0); + let mut offset = 0; + for chunk in source.utf8_chunks() { + for ch in chunk.valid().chars() { + offset += ch.len_utf8(); + assert_eq!(chars.next(), Some(ch)); + assert_eq!(chars.offset(), offset); + } + if !chunk.invalid().is_empty() { + offset += chunk.invalid().len(); + assert_eq!(chars.next(), Some('\u{FFFD}')); + assert_eq!(chars.offset(), offset); + } + } + } +} diff --git a/src/vt.rs b/src/vt.rs new file mode 100644 index 0000000..b8578ea --- /dev/null +++ b/src/vt.rs @@ -0,0 +1,319 @@ +use core::time; + +use crate::memchr::memchr2; + +pub enum Token<'parser, 'input> { + Text(&'input str), + Ctrl(char), + Esc(char), + SS3(char), + Csi(&'parser Csi), + Osc { data: &'input str, partial: bool }, + Dcs { data: &'input str, partial: bool }, +} + +#[derive(Clone, Copy)] +pub enum State { + Ground, + Esc, + Ss3, + Csi, + Osc, + Dcs, + OscEsc, + DcsEsc, +} + +pub struct Csi { + pub params: [i32; 32], + pub param_count: usize, + pub private_byte: char, + pub final_byte: char, +} + +pub struct Parser { + state: State, + // Csi is not part of State, because it allows us + // to more quickly erase and reuse the struct. + csi: Csi, +} + +impl Parser { + pub fn new() -> Self { + Self { + state: State::Ground, + csi: Csi { + params: [0; 32], + param_count: 0, + private_byte: '\0', + final_byte: '\0', + }, + } + } + + /// Suggests a timeout for the next call to `read()`. + /// + /// We need this because of the ambiguouity of whether a trailing + /// escape character in an input is starting another escape sequence or + /// is just the result of the user literally pressing the Escape key. + pub fn read_timeout(&mut self) -> Option { + match self.state { + // 100ms is a upper ceiling for a responsive feel. This uses half that, + // under the assumption that a really slow terminal needs equal amounts + // of time for I and O. Realistically though, this could be much lower. + State::Esc => Some(time::Duration::from_millis(50)), + _ => None, + } + } + + /// Parses the given input into VT sequences. + /// + /// You should call this function even if your `read()` + /// had a timeout (pass an empty string in that case). + pub fn parse<'parser, 'input>( + &'parser mut self, + input: &'input str, + ) -> Stream<'parser, 'input> { + Stream { + parser: self, + input, + off: 0, + } + } +} + +pub struct Stream<'parser, 'input> { + parser: &'parser mut Parser, + input: &'input str, + off: usize, +} + +impl Stream<'_, '_> { + /// Reads and consumes raw bytes from the input. + pub fn read(&mut self, dst: &mut [u8]) -> usize { + let bytes = self.input.as_bytes(); + let off = self.off.min(bytes.len()); + let len = dst.len().min(bytes.len() - off); + dst[..len].copy_from_slice(&bytes[off..off + len]); + self.off += len; + len + } + + /// Parses the next VT sequence from the previously given input. + /// + /// Can't implement Iterator, because this is a "lending iterator". + pub fn next(&mut self) -> Option { + let parser = &mut *self.parser; + let input = self.input; + let bytes = input.as_bytes(); + + // If the previous input ended with an escape character, `read_timeout()` + // returned `Some(..)` timeout, and if the caller did everything correctly + // and there was indeed a timeout, we should be called with an empty + // input. In that case we'll return the escape as its own token. + if input.is_empty() && matches!(parser.state, State::Esc) { + parser.state = State::Ground; + return Some(Token::Esc('\0')); + } + + while self.off < bytes.len() { + match parser.state { + State::Ground => match bytes[self.off] { + 0x1b => { + parser.state = State::Esc; + self.off += 1; + } + c @ (0x00..0x20 | 0x7f) => { + self.off += 1; + return Some(Token::Ctrl(c as char)); + } + _ => { + let beg = self.off; + while { + self.off += 1; + self.off < bytes.len() + && bytes[self.off] >= 0x20 + && bytes[self.off] != 0x7f + } {} + return Some(Token::Text(&input[beg..self.off])); + } + }, + State::Esc => { + let c = bytes[self.off]; + self.off += 1; + match c { + b'[' => { + parser.state = State::Csi; + parser.csi.private_byte = '\0'; + parser.csi.final_byte = '\0'; + while parser.csi.param_count > 0 { + parser.csi.param_count -= 1; + parser.csi.params[parser.csi.param_count] = 0; + } + } + b']' => { + parser.state = State::Osc; + } + b'O' => { + parser.state = State::Ss3; + } + b'P' => { + parser.state = State::Dcs; + } + c => { + parser.state = State::Ground; + return Some(Token::Esc(c as char)); + } + } + } + State::Ss3 => { + parser.state = State::Ground; + let c = bytes[self.off]; + self.off += 1; + return Some(Token::SS3(c as char)); + } + State::Csi => { + loop { + // If we still have slots left, parse the parameter. + if parser.csi.param_count < parser.csi.params.len() { + let dst = &mut parser.csi.params[parser.csi.param_count]; + while self.off < bytes.len() + && bytes[self.off] >= b'0' + && bytes[self.off] <= b'9' + { + let v = *dst * 10 + bytes[self.off] as i32 - b'0' as i32; + *dst = v.min(0xffff); + self.off += 1; + } + } else { + // ...otherwise, skip the parameters until we find the final byte. + while self.off < bytes.len() + && bytes[self.off] >= b'0' + && bytes[self.off] <= b'9' + { + self.off += 1; + } + } + + // Encountered the end of the input before finding the final byte. + if self.off >= bytes.len() { + return None; + } + + let c = bytes[self.off]; + self.off += 1; + + match c { + 0x40..=0x7e => { + parser.state = State::Ground; + parser.csi.final_byte = c as char; + if parser.csi.param_count != 0 || parser.csi.params[0] != 0 { + parser.csi.param_count += 1; + } + return Some(Token::Csi(&parser.csi)); + } + b';' => parser.csi.param_count += 1, + b'<'..=b'?' => parser.csi.private_byte = c as char, + _ => {} + } + } + } + State::Osc | State::Dcs => { + let beg = self.off; + let mut data; + let mut partial; + + loop { + // Find any indication for the end of the OSC/DCS sequence. + self.off = memchr2(b'\x07', b'\x1b', bytes, self.off); + + data = &input[beg..self.off]; + partial = self.off >= bytes.len(); + + // Encountered the end of the input before finding the terminator. + if partial { + break; + } + + let c = bytes[self.off]; + self.off += 1; + + if c == 0x1b { + // It's only a string terminator if it's followed by \. + // We're at the end so we're saving the state and will continue next time. + if self.off >= bytes.len() { + parser.state = match parser.state { + State::Osc => State::OscEsc, + _ => State::DcsEsc, + }; + partial = true; + break; + } + + // False alarm: Not a string terminator. + if bytes[self.off] != b'\\' { + continue; + } + + self.off += 1; + } + + break; + } + + let state = parser.state; + if !partial { + parser.state = State::Ground; + } + return match state { + State::Osc => Some(Token::Osc { data, partial }), + _ => Some(Token::Dcs { data, partial }), + }; + } + State::OscEsc | State::DcsEsc => { + // We were processing an OSC/DCS sequence and the last byte was an escape character. + // It's only a string terminator if it's followed by \ (= "\x1b\\"). + if bytes[self.off] == b'\\' { + // It was indeed a string terminator and we can now tell the caller about it. + let state = parser.state; + + // Consume the terminator (one byte in the previous input and this byte). + parser.state = State::Ground; + self.off += 1; + + return match state { + State::OscEsc => Some(Token::Osc { + data: "", + partial: false, + }), + _ => Some(Token::Dcs { + data: "", + partial: false, + }), + }; + } else { + // False alarm: Not a string terminator. + // We'll return the escape character as a separate token. + // Processing will continue from the current state (`bytes[self.off]`). + parser.state = match parser.state { + State::OscEsc => State::Osc, + _ => State::Dcs, + }; + return match parser.state { + State::Osc => Some(Token::Osc { + data: "\x1b", + partial: true, + }), + _ => Some(Token::Dcs { + data: "\x1b", + partial: true, + }), + }; + } + } + } + } + + None + } +} diff --git a/tools/build_release_windows.bat b/tools/build_release_windows.bat new file mode 100644 index 0000000..5c2a43c --- /dev/null +++ b/tools/build_release_windows.bat @@ -0,0 +1,11 @@ +@echo off + +rem Avoid linking with vcruntime140.dll by statically linking everything, +rem and then explicitly linking with ucrtbase.dll dynamically. +rem We do this, because vcruntime140.dll is an optional Windows component. +set RUSTFLAGS=-Ctarget-feature=+crt-static -Clink-args=/DEFAULTLIB:ucrt.lib -Clink-args=/NODEFAULTLIB:vcruntime.lib -Clink-args=/NODEFAULTLIB:msvcrt.lib -Clink-args=/NODEFAULTLIB:libucrt.lib + +rem The backtrace code for panics in Rust is almost as large as the entire editor. +rem = Huge reduction in binary size by removing all that. +rem cargo build --release -Zbuild-std=std,panic_abort -Zbuild-std-features=panic_immediate_abort %* +cargo build --release %* diff --git a/tools/grapheme-table-gen/Cargo.lock b/tools/grapheme-table-gen/Cargo.lock new file mode 100644 index 0000000..f063166 --- /dev/null +++ b/tools/grapheme-table-gen/Cargo.lock @@ -0,0 +1,380 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anyhow" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cc" +version = "1.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "grapheme-table-gen" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "indoc", + "pico-args", + "rayon", + "roxmltree", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "js-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "roxmltree" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "syn" +version = "2.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53cbcb5a243bd33b7858b1d7f4aca2153490815872d86d955d6ea29f743c035" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "wasm-bindgen" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/tools/grapheme-table-gen/Cargo.toml b/tools/grapheme-table-gen/Cargo.toml new file mode 100644 index 0000000..46cf013 --- /dev/null +++ b/tools/grapheme-table-gen/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "grapheme-table-gen" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.95" +chrono = "0.4.39" +indoc = "2.0.5" +pico-args = { version = "0.5.0", features = ["eq-separator"] } +rayon = "1.10.0" +roxmltree = { version = "0.20.0", default-features = false, features = ["std"] } diff --git a/tools/grapheme-table-gen/src/main.rs b/tools/grapheme-table-gen/src/main.rs new file mode 100644 index 0000000..3b18b7a --- /dev/null +++ b/tools/grapheme-table-gen/src/main.rs @@ -0,0 +1,850 @@ +mod rules; + +use crate::rules::{JOIN_RULES_GRAPHEME_CLUSTER, JOIN_RULES_LINE_BREAK}; +use anyhow::{bail, Context}; +use indoc::writedoc; +use rayon::prelude::*; +use std::collections::HashMap; +use std::fmt::Write as FmtWrite; +use std::io::Write as IoWrite; +use std::ops::RangeInclusive; +use std::path::PathBuf; + +type TrieType = u32; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +enum CharacterWidth { + ZeroWidth, + Narrow, + Wide, + Ambiguous, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +enum ClusterBreak { + Other, // GB999 + Extend, // GB9, GB9a -- includes SpacingMark + RI, // GB12, GB13 + Prepend, // GB9b + HangulL, // GB6, GB7, GB8 + HangulV, // GB6, GB7, GB8 + HangulT, // GB6, GB7, GB8 + HangulLV, // GB6, GB7, GB8 + HangulLVT, // GB6, GB7, GB8 + InCBLinker, // GB9c + InCBConsonant, // GB9c + ExtPic, // GB11 + ZWJ, // GB9, GB11 + + // These are intentionally ordered last, as this allows us to + // simplify the ucd_grapheme_cluster_is_newline implementation. + Control, // GB4, GB5 + CR, // GB3, GB4, GB5 + LF, // GB3, GB4, GB5 +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[allow(non_camel_case_types)] +enum LineBreak { + Other, // Anything else + + // Non-tailorable Line Breaking Classes + WordJoiner, // WJ + ZeroWidthSpace, // ZW + Glue, // GL + Space, // SP + + // Break Opportunities + BreakAfter, // BA + BreakBefore, // BB + Hyphen, // HY + + // Characters Prohibiting Certain Breaks + ClosePunctuation, // CL + CloseParenthesis_EA, // CP, East Asian + CloseParenthesis_NotEA, // CP, not East Asian + Exclamation, // EX + Inseparable, // IN + Nonstarter, // NS + OpenPunctuation_EA, // OP, East Asian + OpenPunctuation_NotEA, // OP, not East Asian + Quotation, // QU + + // Numeric Context + InfixNumericSeparator, // IS + Numeric, // NU + PostfixNumeric, // PO + PrefixNumeric, // PR + SymbolsAllowingBreakAfter, // SY + + // Other Characters + Alphabetic, // AL & HL + Ideographic, // ID & EB & EM +} + +#[derive(Clone, Default)] +struct Ucd { + description: String, + values: Vec, +} + +#[derive(Clone, Default)] +struct Stage { + values: Vec, + index: usize, + shift: usize, + mask: usize, + bits: usize, +} + +#[derive(Clone, Default)] +struct Trie { + stages: Vec, + total_size: usize, +} + +#[derive(Clone, Copy, Default)] +enum Language { + #[default] + C, + Rust, +} + +#[derive(Default)] +struct Output { + arg_lang: Language, + arg_no_ambiguous: bool, + arg_line_breaks: bool, + + ucd: Ucd, + trie: Trie, + rules_gc: [Vec; 2], + rules_lb: Vec, + total_size: usize, +} + +const HELP: &str = "\ +Usage: grapheme-table-gen [options...] + -h, --help Prints help information + --lang= Output language (default: c) + --no-ambiguous Treat all ambiguous characters as narrow + --line-breaks Store and expose line break information +"; + +fn main() -> anyhow::Result<()> { + let mut args = pico_args::Arguments::from_env(); + if args.contains(["-h", "--help"]) { + eprint!("{}", HELP); + return Ok(()); + } + + let mut out = Output { + arg_lang: args.value_from_fn("--lang", |arg| match arg { + "c" => Ok(Language::C), + "rust" => Ok(Language::Rust), + l => bail!("invalid language: \"{}\"", l), + })?, + arg_no_ambiguous: args.contains("--no-ambiguous"), + arg_line_breaks: args.contains("--line-breaks"), + ..Default::default() + }; + let arg_input = args.free_from_os_str(|s| -> Result { Ok(s.into()) })?; + let arg_remaining = args.finish(); + if !arg_remaining.is_empty() { + bail!("unrecognized arguments: {:?}", arg_remaining); + } + + let input = std::fs::read_to_string(arg_input)?; + let doc = roxmltree::Document::parse(&input)?; + out.ucd = extract_values_from_ucd(&doc, &out)?; + + // Find the best trie configuration over the given block sizes (2^2 - 2^8) and stages (4). + // More stages = Less size. The trajectory roughly follows a+b*c^stages, where c < 1. + // 4 still gives ~30% savings over 3 stages and going beyond 5 gives diminishing returns (<10%). + out.trie = build_best_trie(&out.ucd.values, 2, 8, 4); + // The joinRules above has 2 bits per value. This packs it into 32-bit integers to save space. + out.rules_gc = JOIN_RULES_GRAPHEME_CLUSTER + .map(|t| t.iter().map(|row| prepare_rules_row(row, 2, 3)).collect()); + out.rules_lb = JOIN_RULES_LINE_BREAK + .iter() + .map(|row| prepare_rules_row(row, 1, 0)) + .collect(); + + // Each rules item has the same length. Each item is 32 bits = 4 bytes. + out.total_size = out.trie.total_size + out.rules_gc.len() * out.rules_gc[0].len() * 4; + if out.arg_line_breaks { + out.total_size += out.rules_lb.len() * 4; + } + + // Run a quick sanity check to ensure that the trie works as expected. + for (cp, &expected) in out.ucd.values.iter().enumerate() { + let mut actual = 0; + for s in &out.trie.stages { + actual = s.values[actual as usize + ((cp >> s.shift) & s.mask)]; + } + assert_eq!( + expected, actual, + "trie sanity check failed for U+{:04X}", + cp + ); + } + + let buf = match out.arg_lang { + Language::C => generate_c(out), + Language::Rust => generate_rust(out), + }; + + std::io::stdout().write_all(buf.as_bytes())?; + Ok(()) +} + +impl Output { + fn args(&self) -> String { + let mut buf = String::new(); + match self.arg_lang { + Language::C => buf.push_str("--lang=c"), + Language::Rust => buf.push_str("--lang=rust"), + } + if self.arg_no_ambiguous { + buf.push_str(" --no-ambiguous") + } + if self.arg_line_breaks { + buf.push_str(" --line-breaks") + } + buf + } +} + +fn generate_c(out: Output) -> String { + let mut buf = String::new(); + + _ = writedoc!( + buf, + " + // BEGIN: Generated by grapheme-table-gen on {}, from {}, with {}, {} bytes + // clang-format off + ", + chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true), + out.ucd.description, + out.args(), + out.total_size, + ); + + for stage in &out.trie.stages { + let mut width = 16; + if stage.index != 0 { + width = stage.mask + 1; + } + + _ = write!( + buf, + "static const uint{}_t s_stage{}[] = {{", + stage.bits, stage.index + ); + for (j, &value) in stage.values.iter().enumerate() { + if j % width == 0 { + buf.push_str("\n "); + } + _ = write!(buf, " 0x{:01$x},", value, stage.bits / 4); + } + buf.push_str("\n};\n"); + } + + _ = writeln!( + buf, + "static const uint32_t s_grapheme_cluster_join_rules[{}][{}] = {{", + out.rules_gc.len(), + out.rules_gc[0].len() + ); + for table in &out.rules_gc { + buf.push_str(" {\n"); + for &r in table { + _ = writeln!(buf, " 0b{:032b},", r); + } + buf.push_str(" },\n"); + } + buf.push_str("};\n"); + + if out.arg_line_breaks { + _ = writeln!( + buf, + "static const uint32_t s_line_break_join_rules[{}] = {{", + out.rules_lb.len() + ); + for r in &out.rules_lb { + _ = writeln!(buf, " 0b{r:032b},"); + } + buf.push_str("};\n"); + } + + buf.push_str("inline int ucd_grapheme_cluster_lookup(const uint32_t cp)\n{\n"); + for stage in &out.trie.stages { + if stage.index == 0 { + _ = writeln!( + buf, + " const uint{}_t s0 = s_stage0[cp >> {}];", + stage.bits, stage.shift, + ); + } else { + _ = writeln!( + buf, + " const uint{}_t s{} = s_stage{}[s{} + ((cp >> {}) & {})];", + stage.bits, + stage.index, + stage.index, + stage.index - 1, + stage.shift, + stage.mask, + ); + } + } + _ = writeln!(buf, " return s{};", out.trie.stages.len() - 1); + buf.push_str("}\n"); + + _ = writedoc!( + buf, + " + inline int ucd_grapheme_cluster_joins(const int state, const int lead, const int trail) + {{ + const int l = lead & 15; + const int t = trail & 15; + return (s_grapheme_cluster_join_rules[state][l] >> (t * 2)) & 3; + }} + inline bool ucd_grapheme_cluster_joins_done(const int state) + {{ + return state == 3; + }} + inline int ucd_grapheme_cluster_character_width(const int val) + {{ + return (val >> 4) & 3; + }} + inline bool ucd_grapheme_cluster_is_newline(const int val) + {{ + return (val & 15) > {}; + }} + ", + ClusterBreak::Control as u32, + ); + + if out.arg_line_breaks { + _ = writedoc!( + buf, + " + inline bool ucd_line_break_joins(const int lead, const int trail) + {{ + const int l = lead >> 6; + const int t = trail >> 6; + return (s_line_break_join_rules[l] >> t) & 1; + }} + ", + ); + } + + buf.push_str("// clang-format on\n// END: Generated by grapheme-table-gen\n"); + buf +} + +fn generate_rust(out: Output) -> String { + let mut buf = String::new(); + + _ = writeln!( + buf, + "// BEGIN: Generated by grapheme-table-gen on {}, from {}, with {}, {} bytes", + chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true), + out.ucd.description, + out.args(), + out.total_size, + ); + + for stage in &out.trie.stages { + let mut width = 16; + if stage.index != 0 { + width = stage.mask + 1; + } + + _ = write!( + buf, + "#[rustfmt::skip]\npub const STAGE{}: [u{}; {}] = [", + stage.index, + stage.bits, + stage.values.len(), + ); + for (j, &value) in stage.values.iter().enumerate() { + if j % width == 0 { + buf.push_str("\n "); + } + _ = write!(buf, " 0x{:01$x},", value, stage.bits / 4); + } + buf.push_str("\n];\n"); + } + + _ = writeln!( + buf, + "#[rustfmt::skip]\npub const GRAPHEME_JOIN_RULES: [[u32; {}]; {}] = [", + out.rules_gc[0].len(), + out.rules_gc.len(), + ); + for table in &out.rules_gc { + buf.push_str(" [\n"); + for &r in table { + _ = writeln!(buf, " 0b{:032b},", r); + } + buf.push_str(" ],\n"); + } + buf.push_str("];\n"); + + if out.arg_line_breaks { + _ = writeln!( + buf, + "#[rustfmt::skip]\npub const LINE_BREAK_JOIN_RULES: [u32; {}] = [", + out.rules_lb.len(), + ); + for r in &out.rules_lb { + _ = writeln!(buf, " 0b{r:032b},"); + } + buf.push_str("];\n"); + } + + _ = writedoc!( + buf, + " + #[inline(always)] + pub fn ucd_grapheme_cluster_lookup(cp: char) -> usize {{ + let cp = cp as usize; + ", + ); + for stage in &out.trie.stages { + if stage.index == 0 { + _ = writeln!( + buf, + " let s = STAGE{}[cp >> {}] as usize;", + stage.index, stage.shift, + ); + } else if stage.index != out.trie.stages.len() - 1 { + _ = writeln!( + buf, + " let s = STAGE{}[s + (cp & {})] as usize;", + stage.index, stage.mask, + ); + } else { + _ = writeln!( + buf, + " STAGE{}[s + ((cp >> {}) & {})] as usize", + stage.index, stage.shift, stage.mask, + ); + } + } + buf.push_str("}\n"); + + _ = writedoc!( + buf, + " + #[inline(always)] + pub fn ucd_grapheme_cluster_joins(state: u32, lead: usize, trail: usize) -> u32 {{ + let l = lead & 15; + let t = trail & 15; + (GRAPHEME_JOIN_RULES[state as usize][l] >> (t * 2)) & 3 + }} + #[inline(always)] + pub fn ucd_grapheme_cluster_joins_done(state: u32) -> bool {{ + state == 3 + }} + #[inline(always)] + pub fn ucd_grapheme_cluster_character_width(val: usize) -> usize {{ + (val >> 4) & 3 + }} + #[inline(always)] + pub fn ucd_grapheme_cluster_is_newline(val: usize) -> bool {{ + (val & 15) > {} + }} + ", + ClusterBreak::Control as u32, + ); + + if out.arg_line_breaks { + _ = writedoc!( + buf, + " + #[inline(always)] + pub fn ucd_line_break_joins(lead: usize, trail: usize) -> bool {{ + let l = lead >> 6; + let t = trail >> 6; + ((LINE_BREAK_JOIN_RULES[l] >> t) & 1) != 0 + }} + ", + ); + } + + buf.push_str("// END: Generated by grapheme-table-gen\n"); + buf +} + +fn extract_values_from_ucd(doc: &roxmltree::Document, out: &Output) -> anyhow::Result { + let ambiguous_value = if out.arg_no_ambiguous { + CharacterWidth::Narrow + } else { + CharacterWidth::Ambiguous + }; + let mut values = vec![ + trie_value( + ClusterBreak::Other, + CharacterWidth::Narrow, + LineBreak::Other + ); + 1114112 + ]; + + let ns = "http://www.unicode.org/ns/2003/ucd/1.0"; + let root = doc.root_element(); + let description = root + .children() + .find(|n| n.has_tag_name((ns, "description"))) + .context("missing ucd description")?; + let repertoire = root + .children() + .find(|n| n.has_tag_name((ns, "repertoire"))) + .context("missing ucd repertoire")?; + let description = description.text().unwrap_or_default().to_string(); + + for group in repertoire.children().filter(|n| n.is_element()) { + const DEFAULT_ATTRIBUTES: UcdAttributes = UcdAttributes { + general_category: "", + line_break: "", + grapheme_cluster_break: "", + indic_conjunct_break: "", + extended_pictographic: "", + east_asian: "", + }; + let group_attributes = extract_attributes(&group, &DEFAULT_ATTRIBUTES); + + for char in group.children().filter(|n| n.is_element()) { + let char_attributes = extract_attributes(&char, &group_attributes); + let range = extract_range(&char); + + let mut cb = match char_attributes.grapheme_cluster_break { + "XX" => ClusterBreak::Other, // Anything else + // We ignore GB3 which demands that CR × LF do not break apart, because + // * these control characters won't normally reach our text storage + // * otherwise we're in a raw write mode and historically conhost stores them in separate cells + "CR" => ClusterBreak::CR, // Carriage Return + "LF" => ClusterBreak::LF, // Line Feed + "CN" => ClusterBreak::Control, // Control + "EX" | "SM" => ClusterBreak::Extend, // Extend, SpacingMark + "PP" => ClusterBreak::Prepend, // Prepend + "ZWJ" => ClusterBreak::ZWJ, // Zero Width Joiner + "RI" => ClusterBreak::RI, // Regional Indicator + "L" => ClusterBreak::HangulL, // Hangul Syllable Type L + "V" => ClusterBreak::HangulV, // Hangul Syllable Type V + "T" => ClusterBreak::HangulT, // Hangul Syllable Type T + "LV" => ClusterBreak::HangulLV, // Hangul Syllable Type LV + "LVT" => ClusterBreak::HangulLVT, // Hangul Syllable Type LVT + _ => bail!( + "Unrecognized GCB {:?} for U+{:04X} to U+{:04X}", + char_attributes.grapheme_cluster_break, + range.start(), + range.end() + ), + }; + + if char_attributes.extended_pictographic == "Y" { + // Currently every single Extended_Pictographic codepoint happens to be GCB=XX. + // This is fantastic for us because it means we can stuff it into the ClusterBreak enum + // and treat it as an alias of EXTEND, but with the special GB11 properties. + if cb != ClusterBreak::Other { + bail!( + "Unexpected GCB {:?} with ExtPict=Y for U+{:04X} to U+{:04X}", + char_attributes.grapheme_cluster_break, + range.start(), + range.end() + ); + } + + cb = ClusterBreak::ExtPic; + } + + cb = match char_attributes.indic_conjunct_break { + "None" | "Extend" => cb, + "Linker" => ClusterBreak::InCBLinker, + "Consonant" => ClusterBreak::InCBConsonant, + _ => bail!( + "Unrecognized InCB {:?} for U+{:04X} to U+{:04X}", + char_attributes.indic_conjunct_break, + range.start(), + range.end() + ), + }; + + let mut width = match char_attributes.east_asian { + "N" | "Na" | "H" => CharacterWidth::Narrow, // Half-width, Narrow, Neutral + "F" | "W" => CharacterWidth::Wide, // Wide, Full-width + "A" => ambiguous_value, // Ambiguous + _ => bail!( + "Unrecognized ea {:?} for U+{:04X} to U+{:04X}", + char_attributes.east_asian, + range.start(), + range.end() + ), + }; + + // There's no "ea" attribute for "zero width" so we need to do that ourselves. This matches: + // Me: Mark, enclosing + // Mn: Mark, non-spacing + // Cf: Control, format + match char_attributes.general_category { + "Cf" if cb == ClusterBreak::Control => { + // A significant portion of Cf characters are not just gc=Cf (= commonly considered zero-width), + // but also GCB=CN (= does not join). This is a bit of a problem for terminals, + // because they don't support zero-width graphemes, as zero-width columns can't exist. + // So, we turn all of them into Extend, which is roughly how wcswidth() would treat them. + cb = ClusterBreak::Extend; + width = CharacterWidth::ZeroWidth; + } + "Me" | "Mn" | "Cf" => { + width = CharacterWidth::ZeroWidth; + } + _ => {} + }; + + let lb = if out.arg_line_breaks { + let lb_ea = matches!(char_attributes.east_asian, "F" | "W" | "H"); + match char_attributes.line_break { + "WJ" => LineBreak::WordJoiner, + "ZW" => LineBreak::ZeroWidthSpace, + "GL" => LineBreak::Glue, + "SP" => LineBreak::Space, + + "BA" => LineBreak::BreakAfter, + "BB" => LineBreak::BreakBefore, + "HY" => LineBreak::Hyphen, + + "CL" => LineBreak::ClosePunctuation, + "CP" if lb_ea => LineBreak::CloseParenthesis_EA, + "CP" => LineBreak::CloseParenthesis_NotEA, + "EX" => LineBreak::Exclamation, + "IN" => LineBreak::Inseparable, + "NS" => LineBreak::Nonstarter, + "OP" if lb_ea => LineBreak::OpenPunctuation_EA, + "OP" => LineBreak::OpenPunctuation_NotEA, + "QU" => LineBreak::Quotation, + + "IS" => LineBreak::InfixNumericSeparator, + "NU" => LineBreak::Numeric, + "PO" => LineBreak::PostfixNumeric, + "PR" => LineBreak::PrefixNumeric, + "SY" => LineBreak::SymbolsAllowingBreakAfter, + + "AL" | "HL" => LineBreak::Alphabetic, + "ID" | "EB" | "EM" => LineBreak::Ideographic, + + _ => LineBreak::Other, + } + } else { + LineBreak::Other + }; + + values[range].fill(trie_value(cb, width, lb)); + } + } + + // U+00AD: Soft Hyphen + // A soft hyphen is a hint that a word break is allowed at that position. + // By default, the glyph is supposed to be invisible, and only if + // a word break occurs, the text renderer should display a hyphen. + // A terminal does not support computerized typesetting, but unlike the other + // gc=Cf cases we give it a Narrow width, because that matches wcswidth(). + values[0x00AD] = trie_value_mod_width(values[0x00AD], CharacterWidth::Narrow); + + // U+2500 to U+257F: Box Drawing block + // U+2580 to U+259F: Block Elements block + // By default, CharacterWidth.Ambiguous, but by convention .Narrow in terminals. + // + // Most of these characters are LineBreak.Other, but some are actually LineBreak.Alphabetic. + // But to us this doesn't really matter much, because it doesn't make much sense anyway that + // a light double dash is "alphabetic" while a light triple dash is not. + values[0x2500..=0x259F].fill(trie_value( + ClusterBreak::Other, + CharacterWidth::Narrow, + LineBreak::Other, + )); + + // U+FE0F Variation Selector-16 is used to turn unqualified Emojis into qualified ones. + // By convention, this turns them from being ambiguous width (= narrow) into wide ones. + // We achieve this here by explicitly giving this codepoint a wide width. + // Later down below we'll clamp width back to <= 2. + // + // U+FE0F actually has a LineBreak property of CM (Combining Mark), + // but for us that's equivalent to Other. + values[0xFE0F] = trie_value_mod_width(values[0xFE0F], CharacterWidth::Wide); + + Ok(Ucd { + description, + values, + }) +} + +struct UcdAttributes<'a> { + general_category: &'a str, + line_break: &'a str, + grapheme_cluster_break: &'a str, + indic_conjunct_break: &'a str, + extended_pictographic: &'a str, + east_asian: &'a str, +} + +fn extract_attributes<'a>( + node: &'a roxmltree::Node, + default: &'a UcdAttributes, +) -> UcdAttributes<'a> { + UcdAttributes { + general_category: node.attribute("gc").unwrap_or(default.general_category), + line_break: node.attribute("lb").unwrap_or(default.line_break), + grapheme_cluster_break: node + .attribute("GCB") + .unwrap_or(default.grapheme_cluster_break), + indic_conjunct_break: node + .attribute("InCB") + .unwrap_or(default.indic_conjunct_break), + extended_pictographic: node + .attribute("ExtPict") + .unwrap_or(default.extended_pictographic), + east_asian: node.attribute("ea").unwrap_or(default.east_asian), + } +} + +fn extract_range(node: &roxmltree::Node) -> RangeInclusive { + let (first, last) = match node.attribute("cp") { + Some(val) => { + let cp = usize::from_str_radix(val, 16).unwrap(); + (cp, cp) + } + None => ( + usize::from_str_radix(node.attribute("first-cp").unwrap_or("0"), 16).unwrap(), + usize::from_str_radix(node.attribute("last-cp").unwrap_or("0"), 16).unwrap(), + ), + }; + first..=last +} + +fn trie_value(cb: ClusterBreak, width: CharacterWidth, lb: LineBreak) -> TrieType { + let cb = cb as TrieType; + let width = (width as TrieType) << 4; + let lb = (lb as TrieType) << 6; + cb | width | lb +} + +fn trie_value_mod_width(value: TrieType, width: CharacterWidth) -> TrieType { + let value = value & !(3 << 4); // mask out the width bits + let width = (width as TrieType) << 4; + value | width +} + +fn build_best_trie( + uncompressed: &[TrieType], + min_shift: usize, + max_shift: usize, + stages: usize, +) -> Trie { + let depth = stages - 1; + let delta = max_shift - min_shift + 1; + let total = delta.pow(depth as u32); + + let mut tasks = Vec::new(); + for i in 0..total { + let mut shifts = vec![0; depth]; + let mut index = i; + for s in &mut shifts { + *s = min_shift + (index % delta); + index /= delta; + } + tasks.push(shifts); + } + + tasks + .par_iter() + .map(|shifts| build_trie(uncompressed.to_vec(), shifts)) + .min_by_key(|t| t.total_size) + .unwrap() +} + +fn build_trie(mut uncompressed: Vec, shifts: &[usize]) -> Trie { + let mut cumulative_shift = 0; + let mut stages = Vec::new(); + + for &shift in shifts.iter() { + let chunk_size = 1 << shift; + let mut cache = HashMap::new(); + let mut compressed = Vec::new(); + let mut offsets = Vec::new(); + + for off in (0..uncompressed.len()).step_by(chunk_size) { + let chunk = &uncompressed[off..off + chunk_size.min(uncompressed.len() - off)]; + let offset = cache.entry(chunk).or_insert_with(|| { + if let Some(existing) = find_existing(&compressed, chunk) { + existing as TrieType + } else { + let overlap = measure_overlap(&compressed, chunk); + compressed.extend_from_slice(&chunk[overlap..]); + (compressed.len() - chunk.len()) as TrieType + } + }); + offsets.push(*offset); + } + + stages.push(Stage { + values: compressed, + index: shifts.len() - stages.len(), + shift: cumulative_shift, + mask: chunk_size - 1, + bits: 0, + }); + + uncompressed = offsets; + cumulative_shift += shift; + } + + stages.push(Stage { + values: uncompressed, + index: 0, + shift: cumulative_shift, + mask: usize::MAX, + bits: 0, + }); + + stages.reverse(); + + for stage in stages.iter_mut() { + let max_val = stage.values.iter().max().cloned().unwrap_or(0); + stage.bits = match max_val { + 0..0x100 => 8, + 0x100..0x10000 => 16, + _ => 32, + }; + } + + let total_size: usize = stages + .iter() + .map(|stage| (stage.bits / 8) * stage.values.len()) + .sum(); + + Trie { stages, total_size } +} + +fn find_existing(haystack: &[TrieType], needle: &[TrieType]) -> Option { + haystack + .windows(needle.len()) + .position(|window| window == needle) +} + +fn measure_overlap(prev: &[TrieType], next: &[TrieType]) -> usize { + (0..prev.len().min(next.len())) + .rev() + .find(|&i| prev[prev.len() - i..] == next[..i]) + .unwrap_or(0) +} + +fn prepare_rules_row(row: &[i32], bit_width: usize, non_joiner_value: i32) -> u32 { + row.iter().enumerate().fold(0u32, |acc, (trail, &value)| { + let value = if value < 0 { non_joiner_value } else { value }; + acc | ((value as u32) << (trail * bit_width)) + }) +} diff --git a/tools/grapheme-table-gen/src/rules.rs b/tools/grapheme-table-gen/src/rules.rs new file mode 100644 index 0000000..daabcbd --- /dev/null +++ b/tools/grapheme-table-gen/src/rules.rs @@ -0,0 +1,279 @@ +// Used as an indicator in our rules for ÷ ("does not join"). +// Underscore is one of the few characters that are permitted as an identifier, +// are monospace in most fonts and also visually distinct from the digits. +const X: i32 = -1; + +// The following rules are based on the Grapheme Cluster Boundaries section of Unicode Standard Annex #29, +// but slightly modified to allow for use with a plain MxN lookup table. +// +// Break at the start and end of text, unless the text is empty. +// GB1: ~ sot ÷ Any +// GB2: ~ Any ÷ eot +// Handled by our ucd_* functions. +// +// Do not break between a CR and LF. Otherwise, break before and after controls. +// GB3: ✓ CR × LF +// GB4: ✓ (Control | CR | LF) ÷ +// GB5: ✓ ÷ (Control | CR | LF) +// +// Do not break Hangul syllable or other conjoining sequences. +// GB6: ✓ L × (L | V | LV | LVT) +// GB7: ✓ (LV | V) × (V | T) +// GB8: ✓ (LVT | T) × T +// +// Do not break before extending characters or ZWJ. +// GB9: ✓ × (Extend | ZWJ) +// +// Do not break before SpacingMarks, or after Prepend characters. +// GB9a: ✓ × SpacingMark +// GB9b: ✓ Prepend × +// +// Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker. +// GB9c: ~ \p{InCB=Linker} × \p{InCB=Consonant} +// × \p{InCB=Linker} +// modified from +// \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]* × \p{InCB=Consonant} +// because this has almost the same effect from what I can tell for most text, and greatly simplifies our design. +// +// Do not break within emoji modifier sequences or emoji zwj sequences. +// GB11: ~ ZWJ × \p{Extended_Pictographic} modified from \p{Extended_Pictographic} Extend* ZWJ × \p{Extended_Pictographic} +// because this allows us to use LUTs, while working for most valid text. +// +// Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. +// GB12: ~ sot (RI RI)* RI × RI +// GB13: ~ [^RI] (RI RI)* RI × RI +// the lookup table we generate supports RIs via something akin to RI ÷ RI × RI ÷ RI, but the corresponding +// grapheme cluster algorithm doesn't count them. It would need to be updated to recognize and special-case RIs. +// +// Otherwise, break everywhere. +// GB999: ✓ Any ÷ Any +// +// This is a great reference for the resulting table: +// https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html +#[rustfmt::skip] +pub const JOIN_RULES_GRAPHEME_CLUSTER: [[[i32; 16]; 16]; 2] = [ + // Base table + [ + /* ↓ leading → trailing codepoint */ + /* | Other | Extend | RI | Prepend | HangulL | HangulV | HangulT | HangulLV | HangulLVT | InCBLinker | InCBConsonant | ExtPic | ZWJ | Control | CR | LF | */ + /* Other | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* Extend | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* RI | */ [X /* | */, 0 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* Prepend | */ [0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* HangulL | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, 0 /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* HangulV | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* HangulT | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* HangulLV | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* HangulLVT | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* InCBLinker | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* InCBConsonant | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* ExtPic | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* ZWJ | */ [X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */], + /* Control | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */], + /* CR | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */], + /* LF | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */], + ], + // Once we have encountered a Regional Indicator pair we'll enter this table. + // It's a copy of the base table, but instead of RI × RI, we're RI ÷ RI. + [ + /* ↓ leading → trailing codepoint */ + /* | Other | CR | LF | Control | Extend | RI | Prepend | HangulL | HangulV | HangulT | HangulLV | HangulLVT | InCBLinker | InCBConsonant | ExtPic | ZWJ | */ + /* Other | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* CR | */ [X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */], + /* LF | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */], + /* Control | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */], + /* Extend | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* RI | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* Prepend | */ [0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */, 0 /* | */], + /* HangulL | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, 0 /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* HangulV | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* HangulT | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* HangulLV | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* HangulLVT | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* InCBLinker | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, 0 /* | */, X /* | */, 0 /* | */], + /* InCBConsonant | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* ExtPic | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, 0 /* | */], + /* ZWJ | */ [X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 0 /* | */, X /* | */, 0 /* | */, 0 /* | */], + ], +]; + +// The following rules are based on Unicode Standard Annex #14: Line Breaking Properties, +// but heavily modified to allow for use with lookup tables. +// +// NOTE: If you convert these rules into a lookup table, you must apply them in reverse order. +// This is because the rules are ordered from most to least important (e.g. LB8 overrides LB18). +// +// Resolve line breaking classes: +// LB1: Assign a line breaking class [...]. +// ✗ Unicode does that for us via the "lb" attribute. +// +// Start and end of text: +// LB2: Never break at the start of text. +// ~ Functionality not needed. +// LB3: Always break at the end of text. +// ~ Functionality not needed. +// +// Mandatory breaks: +// LB4: Always break after hard line breaks. +// ~ Handled by our ucd_* functions. +// LB5: Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks. +// ~ Handled by our ucd_* functions. +// LB6: Do not break before hard line breaks. +// ~ Handled by our ucd_* functions. +// +// Explicit breaks and non-breaks: +// LB7: Do not break before spaces or zero width space. +// ✗ It's way simpler to treat spaces as if they always break. +// LB8: Break before any character following a zero-width space, even if one or more spaces intervene. +// ~ ZW ÷ modified from ZW SP* ÷ because it's not worth being this anal about accuracy here. +// LB8a: Do not break after a zero width joiner. +// ~ Our ucd_* functions never break within grapheme clusters. +// +// Combining marks: +// LB9: Do not break a combining character sequence; treat it as if it has the line breaking class of the base character in all of the following rules. Treat ZWJ as if it were CM. +// ~ Our ucd_* functions never break within grapheme clusters. +// LB10: Treat any remaining combining mark or ZWJ as AL. +// ✗ To be honest, I'm not entirely sure, I understand the implications of this rule. +// +// Word joiner: +// LB11: Do not break before or after Word joiner and related characters. +// ✓ × WJ +// ✓ WJ × +// +// Non-breaking characters: +// LB12: Do not break after NBSP and related characters. +// ✓ GL × +// LB12a: Do not break before NBSP and related characters, except after spaces and hyphens. +// ✓ [^SP BA HY] × GL +// +// Opening and closing: +// LB13: Do not break before ']' or '!' or '/', even after spaces. +// ✓ × CL +// ✓ × CP +// ✓ × EX +// ✓ × SY +// LB14: Do not break after '[', even after spaces. +// ~ OP × modified from OP SP* × just because it's simpler. It would be nice to address this. +// LB15a: Do not break after an unresolved initial punctuation that lies at the start of the line, after a space, after opening punctuation, or after an unresolved quotation mark, even after spaces. +// ✗ Not implemented. Seemed too complex for little gain? +// LB15b: Do not break before an unresolved final punctuation that lies at the end of the line, before a space, before a prohibited break, or before an unresolved quotation mark, even after spaces. +// ✗ Not implemented. Seemed too complex for little gain? +// LB15c: Break before a decimal mark that follows a space, for instance, in 'subtract .5'. +// ~ SP ÷ IS modified from SP ÷ IS NU because this fits neatly with LB15d. +// LB15d: Otherwise, do not break before ';', ',', or '.', even after spaces. +// ✓ × IS +// LB16: Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces. +// ✗ Not implemented. Could be useful in the future, but its usefulness seemed limited to me. +// LB17: Do not break within '——', even with intervening spaces. +// ✗ Not implemented. Terminal applications nor code use em-dashes much anyway. +// +// Spaces: +// LB18: Break after spaces. +// ✗ Implemented because we didn't implement LB7. +// +// Special case rules: +// LB19: Do not break before non-initial unresolved quotation marks, such as ' ” ' or ' " ', nor after non-final unresolved quotation marks, such as ' “ ' or ' " '. +// ~ × QU modified from × [ QU - \p{Pi} ] +// ~ QU × modified from [ QU - \p{Pf} ] × +// We implement the Unicode 16.0 instead of 16.1 rules, because it's simpler and allows us to use a LUT. +// LB19a: Unless surrounded by East Asian characters, do not break either side of any unresolved quotation marks. +// ✗ [^$EastAsian] × QU +// ✗ × QU ( [^$EastAsian] | eot ) +// ✗ QU × [^$EastAsian] +// ✗ ( sot | [^$EastAsian] ) QU × +// Same as LB19. +// LB20: Break before and after unresolved CB. +// ✗ We break by default. Unicode inline objects are super irrelevant in a terminal in either case. +// LB20a: Do not break after a word-initial hyphen. +// ✗ Not implemented. Seemed not worth the hassle as the window will almost always be >1 char wide. +// LB21: Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana, and other non-starters, or after acute accents. +// ✓ × BA +// ✓ × HY +// ✓ × NS +// ✓ BB × +// LB21a: Do not break after the hyphen in Hebrew + Hyphen + non-Hebrew. +// ✗ Not implemented. Perhaps in the future. +// LB21b: Do not break between Solidus and Hebrew letters. +// ✗ Not implemented. Perhaps in the future. +// LB22: Do not break before ellipses. +// ✓ × IN +// +// Numbers: +// LB23: Do not break between digits and letters. +// ✓ (AL | HL) × NU +// ✓ NU × (AL | HL) +// LB23a: Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes. +// ✓ PR × (ID | EB | EM) +// ✓ (ID | EB | EM) × PO +// LB24: Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix. +// ✓ (PR | PO) × (AL | HL) +// ✓ (AL | HL) × (PR | PO) +// LB25: Do not break numbers: +// ~ CL × PO modified from NU ( SY | IS )* CL × PO +// ~ CP × PO modified from NU ( SY | IS )* CP × PO +// ~ CL × PR modified from NU ( SY | IS )* CL × PR +// ~ CP × PR modified from NU ( SY | IS )* CP × PR +// ~ ( NU | SY | IS ) × PO modified from NU ( SY | IS )* × PO +// ~ ( NU | SY | IS ) × PR modified from NU ( SY | IS )* × PR +// ~ PO × OP modified from PO × OP NU +// ~ PO × OP modified from PO × OP IS NU +// ✓ PO × NU +// ~ PR × OP modified from PR × OP NU +// ~ PR × OP modified from PR × OP IS NU +// ✓ PR × NU +// ✓ HY × NU +// ✓ IS × NU +// ~ ( NU | SY | IS ) × NU modified from NU ( SY | IS )* × NU +// Most were simplified because the cases this additionally allows don't matter much here. +// +// Korean syllable blocks +// LB26: Do not break a Korean syllable. +// ✗ Our ucd_* functions never break within grapheme clusters. +// LB27: Treat a Korean Syllable Block the same as ID. +// ✗ Our ucd_* functions never break within grapheme clusters. +// +// Finally, join alphabetic letters into words and break everything else. +// LB28: Do not break between alphabetics ("at"). +// ✓ (AL | HL) × (AL | HL) +// LB28a: Do not break inside the orthographic syllables of Brahmic scripts. +// ✗ Our ucd_* functions never break within grapheme clusters. +// LB29: Do not break between numeric punctuation and alphabetics ("e.g."). +// ✓ IS × (AL | HL) +// LB30: Do not break between letters, numbers, or ordinary symbols and opening or closing parentheses. +// ✓ (AL | HL | NU) × [OP-$EastAsian] +// ✓ [CP-$EastAsian] × (AL | HL | NU) +// LB30a: Break between two regional indicator symbols if and only if there are an even number of regional indicators preceding the position of the break. +// ✗ Our ucd_* functions never break within grapheme clusters. +// LB30b: Do not break between an emoji base (or potential emoji) and an emoji modifier. +// ✗ Our ucd_* functions never break within grapheme clusters. +// LB31: Break everywhere else. +// ✗ Our default behavior. +#[rustfmt::skip] +pub const JOIN_RULES_LINE_BREAK: [[i32; 24]; 24] = [ + /* ↓ leading → trailing codepoint */ + /* | Other | WordJoiner | ZeroWidthSpace | Glue | Space | BreakAfter | BreakBefore | Hyphen | ClosePunctuation | CloseParenthesis_EA | CloseParenthesis_NotEA | Exclamation | Inseparable | Nonstarter | OpenPunctuation_EA | OpenPunctuation_NotEA | Quotation | InfixNumericSeparator | Numeric | PostfixNumeric | PrefixNumeric | SymbolsAllowingBreakAfter | Alphabetic | Ideographic | */ + /* Other | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */], + /* WordJoiner | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */], + /* ZeroWidthSpace | */ [X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */, X /* | */], + /* Glue | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */], + /* Space | */ [X /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */], + /* BreakAfter | */ [X /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */], + /* BreakBefore | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */], + /* Hyphen | */ [X /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */], + /* ClosePunctuation | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */], + /* CloseParenthesis_EA | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */], + /* CloseParenthesis_NotEA | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */], + /* Exclamation | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */], + /* Inseparable | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */], + /* Nonstarter | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */], + /* OpenPunctuation_EA | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */], + /* OpenPunctuation_NotEA | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */], + /* Quotation | */ [1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */], + /* InfixNumericSeparator | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */], + /* Numeric | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */], + /* PostfixNumeric | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */], + /* PrefixNumeric | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */], + /* SymbolsAllowingBreakAfter | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */], + /* Alphabetic | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */], + /* Ideographic | */ [X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, 1 /* | */, X /* | */, X /* | */, 1 /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, 1 /* | */, X /* | */, X /* | */], +];