From 67e7e470b25b29b5b7be6a2da9c1163c862ee6ba Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:40:06 -0600 Subject: [PATCH 01/31] hoist deps to workspace.dependencies --- Cargo.toml | 13 ++++++++ compiler/Cargo.toml | 12 ++++---- edge-pdk/macros/Cargo.toml | 6 ++-- examples/slugify-mod/Cargo.toml | 2 +- workspace-dependencies-table.txt | 51 ++++++++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 10 deletions(-) create mode 100644 workspace-dependencies-table.txt diff --git a/Cargo.toml b/Cargo.toml index 886831c..ca4fe87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,19 @@ edition = "2024" license = "MIT OR Apache-2.0" repository = "https://github.com/dylan-sutton-chavez/edge-python/" +# Single source of truth for third-party versions. Members opt in with +# `dep = { workspace = true }` so a bump touches one line workspace-wide +# and the lockfile cannot grow accidental version splits. +[workspace.dependencies] +hashbrown = { version = "0.17", default-features = false } +itoa = "1" +lol_alloc = "0.4" +proc-macro2 = "1" +quote = "1" +syn = { version = "2", features = ["full"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" + [profile.release] opt-level = "z" lto = true diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index d55fa0a..eecb416 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -20,18 +20,18 @@ name = "tests" path = "tests/main.rs" [dependencies] -hashbrown = { version = "0.17", default-features = false } -itoa = "1" +hashbrown = { workspace = true } +itoa = { workspace = true } # WASM-only global allocator. Pulled into wasm32 builds automatically; absent # on host builds (where std's allocator is fine and `compiler.wasm` is just a # library artifact, not the runtime target). [target.'cfg(target_arch = "wasm32")'.dependencies] -lol_alloc = "0.4" +lol_alloc = { workspace = true } # Test-only deps for the JSON-driven test runner. None of these are pulled # into the release `compiler.wasm`. [dev-dependencies] -hashbrown = { version = "0.17", default-features = false, features = ["serde"] } -serde = { version = "1", features = ["derive"] } -serde_json = "1" +hashbrown = { workspace = true, features = ["serde"] } +serde = { workspace = true } +serde_json = { workspace = true } diff --git a/edge-pdk/macros/Cargo.toml b/edge-pdk/macros/Cargo.toml index f15cddf..df17577 100644 --- a/edge-pdk/macros/Cargo.toml +++ b/edge-pdk/macros/Cargo.toml @@ -12,6 +12,6 @@ test = false doctest = false [dependencies] -proc-macro2 = "1" -quote = "1" -syn = { version = "2", features = ["full"] } +proc-macro2 = { workspace = true } +quote = { workspace = true } +syn = { workspace = true } diff --git a/examples/slugify-mod/Cargo.toml b/examples/slugify-mod/Cargo.toml index 1195c52..27a3ec0 100644 --- a/examples/slugify-mod/Cargo.toml +++ b/examples/slugify-mod/Cargo.toml @@ -12,4 +12,4 @@ doctest = false [dependencies] edge-pdk = { path = "../../edge-pdk" } -lol_alloc = "0.4" +lol_alloc = { workspace = true } diff --git a/workspace-dependencies-table.txt b/workspace-dependencies-table.txt new file mode 100644 index 0000000..f5e9793 --- /dev/null +++ b/workspace-dependencies-table.txt @@ -0,0 +1,51 @@ +Branch: workspace-dependencies-table +Tech debt: D54 — no [workspace.dependencies] table; lol_alloc duplicated. + +================================================================================ +Commit 1: hoist third-party versions to workspace.dependencies +================================================================================ +Files: + - Cargo.toml (root) + - compiler/Cargo.toml + - edge-pdk/macros/Cargo.toml + - examples/slugify-mod/Cargo.toml + +What was wrong: + Each crate declared its own version of every third-party dep. The + immediate concrete duplication was lol_alloc 0.4, listed in both + compiler/Cargo.toml and examples/slugify-mod/Cargo.toml. Two real + failure modes: + + 1. A bump applied to one Cargo.toml but missed in the other lets + cargo resolve two copies of lol_alloc into the same lockfile, so + every workspace build pulls both versions. + 2. Workspace-wide bumps (security, MSRV, feature) require N edits + and N reviews instead of one. + + hashbrown, itoa, serde, serde_json, proc-macro2, quote, syn each + exist in only one crate today, but the workspace is a published-shape + artifact (cdylib + rlib + PDK). Any new crate that needed e.g. itoa + would need to either re-declare the version or coincidentally pick + the same one — exactly the situation lol_alloc fell into. + +Fix: + Added a [workspace.dependencies] table to the root Cargo.toml with + every shared third-party dep: + hashbrown (default-features = false) + itoa, lol_alloc, proc-macro2, quote, serde_json + syn (features = ["full"]) + serde (features = ["derive"]) + + Each member crate now uses `dep = { workspace = true }`, with the + one exception that needs an extra feature on top + (compiler dev-dependencies for hashbrown adds ["serde"] via + `{ workspace = true, features = ["serde"] }` — this is the + intended cargo pattern for additive-feature opt-in). + + Added a /* ... */-style comment in the root Cargo.toml explaining + the intent so a future reader knows where new shared deps go. + +Documentation: + No .md docs reference the workspace dependency layout. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. From b77fb73a6df8fc996230d6405fed47b3ecc407ca Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 19:39:36 -0600 Subject: [PATCH 02/31] move fx fstr sha256 to util module --- compiler/README.md | 7 +-- compiler/src/lib.rs | 13 +++-- compiler/src/main/resolver.rs | 2 +- compiler/src/modules/packages/manifest.rs | 2 +- compiler/src/modules/packages/mod.rs | 2 +- compiler/src/modules/parser/expr.rs | 2 +- compiler/src/modules/parser/imports.rs | 4 +- compiler/src/modules/parser/literals.rs | 4 +- compiler/src/modules/parser/mod.rs | 2 +- compiler/src/modules/parser/types.rs | 2 +- compiler/src/modules/vm/builtins/attr.rs | 8 +-- compiler/src/modules/vm/builtins/container.rs | 2 +- compiler/src/modules/vm/builtins/identity.rs | 2 +- compiler/src/modules/vm/handlers/format.rs | 2 +- compiler/src/modules/vm/handlers/function.rs | 10 ++-- compiler/src/modules/vm/handlers/methods.rs | 14 ++--- .../modules/vm/handlers/methods_helpers.rs | 2 +- compiler/src/modules/vm/init.rs | 10 ++-- compiler/src/modules/vm/mod.rs | 6 +-- compiler/src/modules/vm/ops.rs | 4 +- compiler/src/modules/vm/types/mod.rs | 2 +- compiler/src/{modules => util}/fstr.rs | 6 +-- compiler/src/{modules => util}/fx.rs | 0 compiler/src/{modules => util}/sha256.rs | 0 compiler/tests/common.rs | 2 +- documentation/implementation/design.md | 7 +-- move-utils-to-util-mod.txt | 53 +++++++++++++++++++ 27 files changed, 116 insertions(+), 54 deletions(-) rename compiler/src/{modules => util}/fstr.rs (94%) rename compiler/src/{modules => util}/fx.rs (100%) rename compiler/src/{modules => util}/sha256.rs (100%) create mode 100644 move-utils-to-util-mod.txt diff --git a/compiler/README.md b/compiler/README.md index c4751ef..6cc01bf 100644 --- a/compiler/README.md +++ b/compiler/README.md @@ -90,10 +90,11 @@ Mark-and-sweep with roots: operand stack, with-stack, pending yields, event queu │ │ ├── abi_bridge.rs │ │ ├── resolver.rs │ │ └── errors.rs +│ ├── util +│ │ ├── fstr.rs +│ │ ├── fx.rs +│ │ └── sha256.rs │ └── modules -│ ├── fstr.rs -│ ├── fx.rs -│ ├── sha256.rs │ ├── lexer │ │ ├── mod.rs │ │ ├── scan.rs diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 5c4ac88..e491648 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -8,12 +8,19 @@ pub mod abi; #[cfg(target_arch = "wasm32")] pub mod main; -pub mod modules { +/* Internal helpers shared across the compiler — not Edge Python language + modules. Kept separate from `modules/` (which contains lexer/parser/vm/ + packages — runtime components) so contributors don't mistake utility + code for built-in stdlib. */ +pub mod util { pub mod fx; + pub mod fstr; + pub mod sha256; +} + +pub mod modules { pub mod lexer; pub mod vm; pub mod parser; pub mod packages; - pub mod fstr; - pub mod sha256; } \ No newline at end of file diff --git a/compiler/src/main/resolver.rs b/compiler/src/main/resolver.rs index 9e8bdb2..777fd8e 100644 --- a/compiler/src/main/resolver.rs +++ b/compiler/src/main/resolver.rs @@ -1,6 +1,6 @@ use crate::modules::vm::types::{HeapPool, Val, VmErr}; use crate::modules::packages::{NativeBinding, Resolved, Resolver, parse_manifest, walk_up_dirs, dir_of, join_relative}; -use crate::modules::fx::FxHashSet; +use crate::util::fx::FxHashSet; use alloc::{boxed::Box, string::{String, ToString}, sync::Arc, vec::Vec}; use crate::s; diff --git a/compiler/src/modules/packages/manifest.rs b/compiler/src/modules/packages/manifest.rs index ec9ce20..214b84a 100644 --- a/compiler/src/modules/packages/manifest.rs +++ b/compiler/src/modules/packages/manifest.rs @@ -1,7 +1,7 @@ use alloc::string::{String, ToString}; use crate::s; -use crate::modules::fx::FxHashMap; +use crate::util::fx::FxHashMap; /* Parsed `packages.json`. `imports` maps bare names to specs; `extends` inherits another manifest's imports when a name isn't local. */ #[derive(Clone)] diff --git a/compiler/src/modules/packages/mod.rs b/compiler/src/modules/packages/mod.rs index e55f3c1..a66ef45 100644 --- a/compiler/src/modules/packages/mod.rs +++ b/compiler/src/modules/packages/mod.rs @@ -65,7 +65,7 @@ pub fn parse_integrity(spec: &str) -> Result<(&str, Option<[u8; 32]>), String> { "sha256 fragment must be 64 hex chars in '", str spec, "'; got ", int hex.len() as i64)); } - let hash = crate::modules::sha256::hex_decode_32(hex).ok_or_else(|| s!( + let hash = crate::util::sha256::hex_decode_32(hex).ok_or_else(|| s!( "invalid hex in sha256 fragment of '", str spec, "'"))?; Ok((url, Some(hash))) } diff --git a/compiler/src/modules/parser/expr.rs b/compiler/src/modules/parser/expr.rs index 21f67c7..888ce53 100644 --- a/compiler/src/modules/parser/expr.rs +++ b/compiler/src/modules/parser/expr.rs @@ -387,7 +387,7 @@ impl<'src, I: Iterator> Parser<'src, I> { s.chunk.emit(OpCode::ReturnValue, 0); }); - let param_slots: crate::modules::fx::FxHashSet = params.iter().map(|p| s!(str p.trim_start_matches('*'), "_0")).collect(); + let param_slots: crate::util::fx::FxHashSet = params.iter().map(|p| s!(str p.trim_start_matches('*'), "_0")).collect(); for name in &body.names { if !param_slots.contains(name.as_str()) { self.chunk.push_name(name); diff --git a/compiler/src/modules/parser/imports.rs b/compiler/src/modules/parser/imports.rs index 7f12e43..5e064ec 100644 --- a/compiler/src/modules/parser/imports.rs +++ b/compiler/src/modules/parser/imports.rs @@ -8,8 +8,8 @@ use super::Parser; use super::types::{Diagnostic, ImportEntry, ImportKind, OpCode, SSAChunk, parse_string, ssa_strip}; use crate::modules::lexer::{Token, TokenType, lex}; use crate::modules::packages::{Resolved, binding_to_extern, parse_integrity}; -use crate::modules::sha256::{sha256, hex_encode}; -use crate::modules::fx::FxHashSet; +use crate::util::sha256::{sha256, hex_encode}; +use crate::util::fx::FxHashSet; use alloc::{string::{String, ToString}, vec::Vec}; diff --git a/compiler/src/modules/parser/literals.rs b/compiler/src/modules/parser/literals.rs index 9d54103..da3ef24 100644 --- a/compiler/src/modules/parser/literals.rs +++ b/compiler/src/modules/parser/literals.rs @@ -5,7 +5,7 @@ use super::types::builtin; use super::types::{OpCode, Value, SSAChunk, Instruction}; use crate::modules::lexer::{Token, TokenType, utf8_char_len}; -use crate::modules::fx::FxHashMap as HashMap; +use crate::util::fx::FxHashMap as HashMap; use alloc::{string::{String, ToString}, vec::Vec}; @@ -437,7 +437,7 @@ impl<'src, I: Iterator> Parser<'src, I> { let body = self.compile_body(¶ms); // Propagate free names to parent chunk so nested defs capture grandparent vars. - let param_slots: crate::modules::fx::FxHashSet = params.iter() + let param_slots: crate::util::fx::FxHashSet = params.iter() .map(|p| s!(str p.trim_start_matches(['*', '~']), "_0")).collect(); for name in &body.names { if !param_slots.contains(name.as_str()) { diff --git a/compiler/src/modules/parser/mod.rs b/compiler/src/modules/parser/mod.rs index 420c687..76edcef 100644 --- a/compiler/src/modules/parser/mod.rs +++ b/compiler/src/modules/parser/mod.rs @@ -10,7 +10,7 @@ pub use types::*; use crate::s; use crate::modules::lexer::{Token, TokenType}; -use crate::modules::fx::FxHashMap as HashMap; +use crate::util::fx::FxHashMap as HashMap; use crate::modules::packages::{Resolver, NoopResolver}; use alloc::{boxed::Box, string::{String, ToString}, vec::Vec}; diff --git a/compiler/src/modules/parser/types.rs b/compiler/src/modules/parser/types.rs index 396a288..977538c 100644 --- a/compiler/src/modules/parser/types.rs +++ b/compiler/src/modules/parser/types.rs @@ -1,5 +1,5 @@ use crate::s; -use crate::modules::fx::FxHashMap as HashMap; +use crate::util::fx::FxHashMap as HashMap; use crate::modules::vm::types::ExternFn; use alloc::{string::{String, ToString}, vec, vec::Vec}; diff --git a/compiler/src/modules/vm/builtins/attr.rs b/compiler/src/modules/vm/builtins/attr.rs index c4ed16a..5ff433d 100644 --- a/compiler/src/modules/vm/builtins/attr.rs +++ b/compiler/src/modules/vm/builtins/attr.rs @@ -124,8 +124,8 @@ impl<'a> VM<'a> { &mut self, chunk: &crate::modules::parser::SSAChunk, slots: &[Val], ) -> Result<(), VmErr> { // Builtin/type/module pairs from self.globals, deduped to bare names. - let mut out: crate::modules::fx::FxHashMap = - crate::modules::fx::FxHashMap::default(); + let mut out: crate::util::fx::FxHashMap = + crate::util::fx::FxHashMap::default(); for (k, v) in self.globals.iter() { // Drop SSA-mirrors (`x_0`, `x_1`); keep canonical bare name. if let Some((bare, suf)) = k.rsplit_once('_') @@ -180,8 +180,8 @@ impl<'a> VM<'a> { &mut self, chunk: &crate::modules::parser::SSAChunk, slots: &[Val], ) -> Result<(), VmErr> { // Map bare-name -> (best version, val) so we keep only the latest. - let mut latest: crate::modules::fx::FxHashMap = - crate::modules::fx::FxHashMap::default(); + let mut latest: crate::util::fx::FxHashMap = + crate::util::fx::FxHashMap::default(); for (i, name) in chunk.names.iter().enumerate() { let v = match slots.get(i) { Some(v) if !v.is_undef() => *v, diff --git a/compiler/src/modules/vm/builtins/container.rs b/compiler/src/modules/vm/builtins/container.rs index ac6d46b..f8033f0 100644 --- a/compiler/src/modules/vm/builtins/container.rs +++ b/compiler/src/modules/vm/builtins/container.rs @@ -1,6 +1,6 @@ use core::cell::RefCell; use alloc::{rc::Rc, string::String, vec::Vec}; -use crate::modules::fx::FxHashSet as HashSet; +use crate::util::fx::FxHashSet as HashSet; use super::super::VM; use super::super::types::*; diff --git a/compiler/src/modules/vm/builtins/identity.rs b/compiler/src/modules/vm/builtins/identity.rs index 5b39437..4131969 100644 --- a/compiler/src/modules/vm/builtins/identity.rs +++ b/compiler/src/modules/vm/builtins/identity.rs @@ -33,7 +33,7 @@ impl<'a> VM<'a> { pub fn call_hash(&mut self) -> Result<(), VmErr> { use core::hash::{Hash, Hasher}; let o = self.pop()?; - let mut h = crate::modules::fx::FxHasher::default(); + let mut h = crate::util::fx::FxHasher::default(); if o.is_int() { o.as_int().hash(&mut h); } else if o.is_float() { o.as_float().to_bits().hash(&mut h); } else if o.is_bool() { o.as_bool().hash(&mut h); } diff --git a/compiler/src/modules/vm/handlers/format.rs b/compiler/src/modules/vm/handlers/format.rs index 2bda5e4..845db5c 100644 --- a/compiler/src/modules/vm/handlers/format.rs +++ b/compiler/src/modules/vm/handlers/format.rs @@ -426,7 +426,7 @@ pub fn display_inline(v: Val, heap: &HeapPool) -> String { } if v.is_bool() { return (if v.as_bool() { "True" } else { "False" }).to_string(); } if v.is_none() { return String::from("None"); } - if v.is_float() { return crate::modules::fstr::format_f64(v.as_float()); } + if v.is_float() { return crate::util::fstr::format_f64(v.as_float()); } if v.is_heap() && let HeapObj::Str(s) = heap.get(v) { return s.clone(); } /* Fall back to nothing — caller should use VM::display for full coverage. */ diff --git a/compiler/src/modules/vm/handlers/function.rs b/compiler/src/modules/vm/handlers/function.rs index b469a30..2fe55bc 100644 --- a/compiler/src/modules/vm/handlers/function.rs +++ b/compiler/src/modules/vm/handlers/function.rs @@ -76,13 +76,13 @@ impl<'a> VM<'a> { let defaults = if n_defaults > 0 { self.pop_n(n_defaults)? } else { vec![] }; let (params, body, _, _) = self.functions[global]; - let param_names: crate::modules::fx::FxHashSet = params.iter().map(|p| s!(str p.trim_start_matches(['*', '~']), "_0")).collect(); + let param_names: crate::util::fx::FxHashSet = params.iter().map(|p| s!(str p.trim_start_matches(['*', '~']), "_0")).collect(); let mut captures: Vec<(usize, Val)> = Vec::new(); // Capture closure values once per canonical (coalesced) slot, skipping // names already bound as formal parameters. The body.names list is // typically <30, so a linear scan over chunk.names is competitive // with a HashMap and avoids a per-call monomorphization. - let mut seen_canonical: crate::modules::fx::FxHashSet = crate::modules::fx::FxHashSet::default(); + let mut seen_canonical: crate::util::fx::FxHashSet = crate::util::fx::FxHashSet::default(); for (bi, bname) in body.names.iter().enumerate() { if param_names.contains(bname.as_str()) { continue; } let canon = body.alias_groups.get(bi) @@ -340,12 +340,12 @@ impl<'a> VM<'a> { let callee_module = self.fn_module.get(fi).cloned().flatten(); let same_scope = caller_fi == callee_parent_fi && caller_module == callee_module; - let captured_set: crate::modules::fx::FxHashSet = if same_scope { - crate::modules::fx::FxHashSet::default() + let captured_set: crate::util::fx::FxHashSet = if same_scope { + crate::util::fx::FxHashSet::default() } else if let HeapObj::Func(_, _, captures) = self.heap.get(callee) { captures.iter().map(|(s, _)| *s).collect() } else { - crate::modules::fx::FxHashSet::default() + crate::util::fx::FxHashSet::default() }; for (si, &v) in slots.iter().enumerate() { if !v.is_undef() diff --git a/compiler/src/modules/vm/handlers/methods.rs b/compiler/src/modules/vm/handlers/methods.rs index 180a08b..022f6df 100644 --- a/compiler/src/modules/vm/handlers/methods.rs +++ b/compiler/src/modules/vm/handlers/methods.rs @@ -846,7 +846,7 @@ define_methods! { check_arity(&pos, 1, 1, "intersection takes 1 argument")?; let lhs = set_clone(vm, recv)?; let rhs_items = iter_to_vec(vm, pos[0])?; - let rhs: crate::modules::fx::FxHashSet = rhs_items.into_iter().collect(); + let rhs: crate::util::fx::FxHashSet = rhs_items.into_iter().collect(); let out: Vec = lhs.into_iter().filter(|v| rhs.contains(v)).collect(); vm.alloc_and_push_set(out) }), @@ -854,34 +854,34 @@ define_methods! { check_arity(&pos, 1, 1, "difference takes 1 argument")?; let lhs = set_clone(vm, recv)?; let rhs_items = iter_to_vec(vm, pos[0])?; - let rhs: crate::modules::fx::FxHashSet = rhs_items.into_iter().collect(); + let rhs: crate::util::fx::FxHashSet = rhs_items.into_iter().collect(); let out: Vec = lhs.into_iter().filter(|v| !rhs.contains(v)).collect(); vm.alloc_and_push_set(out) }), (SetSymmetricDifference, "symmetric_difference", pure, |vm, recv, pos| { check_arity(&pos, 1, 1, "symmetric_difference takes 1 argument")?; - let lhs: crate::modules::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); - let rhs: crate::modules::fx::FxHashSet = iter_to_vec(vm, pos[0])?.into_iter().collect(); + let lhs: crate::util::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); + let rhs: crate::util::fx::FxHashSet = iter_to_vec(vm, pos[0])?.into_iter().collect(); let out: Vec = lhs.symmetric_difference(&rhs).copied().collect(); vm.alloc_and_push_set(out) }), (SetIsSubset, "issubset", pure, |vm, recv, pos| { check_arity(&pos, 1, 1, "issubset takes 1 argument")?; let lhs = set_clone(vm, recv)?; - let rhs: crate::modules::fx::FxHashSet = iter_to_vec(vm, pos[0])?.into_iter().collect(); + let rhs: crate::util::fx::FxHashSet = iter_to_vec(vm, pos[0])?.into_iter().collect(); vm.push(Val::bool(lhs.iter().all(|v| rhs.contains(v)))); Ok(()) }), (SetIsSuperset, "issuperset", pure, |vm, recv, pos| { check_arity(&pos, 1, 1, "issuperset takes 1 argument")?; - let lhs: crate::modules::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); + let lhs: crate::util::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); let rhs = iter_to_vec(vm, pos[0])?; vm.push(Val::bool(rhs.iter().all(|v| lhs.contains(v)))); Ok(()) }), (SetIsDisjoint, "isdisjoint", pure, |vm, recv, pos| { check_arity(&pos, 1, 1, "isdisjoint takes 1 argument")?; - let lhs: crate::modules::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); + let lhs: crate::util::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); let rhs = iter_to_vec(vm, pos[0])?; vm.push(Val::bool(!rhs.iter().any(|v| lhs.contains(v)))); Ok(()) diff --git a/compiler/src/modules/vm/handlers/methods_helpers.rs b/compiler/src/modules/vm/handlers/methods_helpers.rs index 2bc5267..7049180 100644 --- a/compiler/src/modules/vm/handlers/methods_helpers.rs +++ b/compiler/src/modules/vm/handlers/methods_helpers.rs @@ -96,7 +96,7 @@ pub(super) fn set_clone(vm: &VM, recv: Val) -> Result, VmErr> { (add, remove, discard, pop, clear, update). */ #[inline] pub(super) fn set_mut(vm: &mut VM, recv: Val, err: &'static str, f: F) -> Result -where F: FnOnce(&mut crate::modules::fx::FxHashSet) -> Result +where F: FnOnce(&mut crate::util::fx::FxHashSet) -> Result { match vm.heap.get_mut(recv) { HeapObj::Set(rc) => f(&mut rc.borrow_mut()), diff --git a/compiler/src/modules/vm/init.rs b/compiler/src/modules/vm/init.rs index a972734..4e66e02 100644 --- a/compiler/src/modules/vm/init.rs +++ b/compiler/src/modules/vm/init.rs @@ -12,8 +12,8 @@ use super::types::*; canonical slot; `seen` deduplicates so the latest value wins. */ fn collect_module_attrs(chunk: &SSAChunk, slots: &[Val]) -> Vec<(String, Val)> { let mut attrs: Vec<(String, Val)> = Vec::new(); - let mut seen: crate::modules::fx::FxHashSet = - crate::modules::fx::FxHashSet::default(); + let mut seen: crate::util::fx::FxHashSet = + crate::util::fx::FxHashSet::default(); for ins in &chunk.instructions { if !matches!(ins.opcode, OpCode::StoreName) { continue; } let Some(name) = chunk.names.get(ins.operand as usize) else { continue; }; @@ -97,8 +97,8 @@ impl<'a> VM<'a> { // user code dispatches. Topological order falls out of recursive // descent: a module's dependencies are seen + initialised before // its own top-level runs. - let mut in_progress: crate::modules::fx::FxHashSet = - crate::modules::fx::FxHashSet::default(); + let mut in_progress: crate::util::fx::FxHashSet = + crate::util::fx::FxHashSet::default(); self.init_modules(self.chunk, &mut in_progress)?; let mut slots = self.fill_builtins(&self.chunk.names); self.exec(self.chunk, &mut slots) @@ -114,7 +114,7 @@ impl<'a> VM<'a> { fn init_modules( &mut self, chunk: &SSAChunk, - in_progress: &mut crate::modules::fx::FxHashSet, + in_progress: &mut crate::util::fx::FxHashSet, ) -> Result<(), VmErr> { for entry in &chunk.imports { if self.module_table.contains_key(&entry.spec) { continue; } diff --git a/compiler/src/modules/vm/mod.rs b/compiler/src/modules/vm/mod.rs index 63e23c1..75bb63d 100644 --- a/compiler/src/modules/vm/mod.rs +++ b/compiler/src/modules/vm/mod.rs @@ -12,7 +12,7 @@ mod init; use crate::s; use crate::modules::parser::{SSAChunk, BUILTIN_TYPES}; -use crate::modules::fx::FxHashMap as HashMap; +use crate::util::fx::FxHashMap as HashMap; pub use types::{Val, HeapObj, HeapPool, VmErr, Limits}; @@ -230,7 +230,7 @@ impl<'a> VM<'a> { // True iff the body references names not in params/builtins/captures. vm.needs_caller_slots = (0..vm.functions.len()).map(|fi| { let (params, body, _, _) = vm.functions[fi]; - let param_names: crate::modules::fx::FxHashSet<&str> = params.iter() + let param_names: crate::util::fx::FxHashSet<&str> = params.iter() .map(|p| p.trim_start_matches(['*', '~'])).collect(); body.names.iter().any(|n| { let base = crate::modules::parser::ssa_strip(n); @@ -255,7 +255,7 @@ impl<'a> VM<'a> { vm.body_free_loads = (0..vm.functions.len()).map(|fi| { let (_, body, _, _) = vm.functions[fi]; let param_bm = &vm.is_param_slot[fi]; - let mut written: crate::modules::fx::FxHashSet = crate::modules::fx::FxHashSet::default(); + let mut written: crate::util::fx::FxHashSet = crate::util::fx::FxHashSet::default(); for ins in &body.instructions { if matches!(ins.opcode, crate::modules::parser::OpCode::StoreName | crate::modules::parser::OpCode::Phi) { written.insert(ins.operand as usize); diff --git a/compiler/src/modules/vm/ops.rs b/compiler/src/modules/vm/ops.rs index 8aa2141..bb9aef8 100644 --- a/compiler/src/modules/vm/ops.rs +++ b/compiler/src/modules/vm/ops.rs @@ -189,7 +189,7 @@ impl<'a> VM<'a> { if !(Val::INT_MIN..=Val::INT_MAX).contains(&i) { return b.format(i).into(); } let mut s = String::new(); s.push_str(b.format(i)); s.push_str(".0"); return s; } - return crate::modules::fstr::format_f64(f); + return crate::util::fstr::format_f64(f); } if v.is_true() { return "True".into(); } if v.is_false() { return "False".into(); } @@ -360,7 +360,7 @@ impl<'a> VM<'a> { push. Dedups via HashSet's bit-eq, consistent with how literals and method results are stored. */ fn alloc_set_value(&mut self, items: Vec) -> Result { - let mut s = crate::modules::fx::FxHashSet::default(); + let mut s = crate::util::fx::FxHashSet::default(); for v in items { s.insert(v); } self.heap.alloc(HeapObj::Set(Rc::new(RefCell::new(s)))) } diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index 7553573..ccf4db6 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -1,6 +1,6 @@ use alloc::{rc::Rc, string::String, vec, vec::Vec}; use core::cell::RefCell; -use crate::modules::fx::{FxHashMap as HashMap, FxHashSet as HashSet}; +use crate::util::fx::{FxHashMap as HashMap, FxHashSet as HashSet}; pub mod coro; pub mod eq; diff --git a/compiler/src/modules/fstr.rs b/compiler/src/util/fstr.rs similarity index 94% rename from compiler/src/modules/fstr.rs rename to compiler/src/util/fstr.rs index 8a5b099..813fd29 100644 --- a/compiler/src/modules/fstr.rs +++ b/compiler/src/util/fstr.rs @@ -52,7 +52,7 @@ macro_rules! push { ($s:ident, $v:literal) => { $s.push_str($v); }; ($s:ident, str $v:expr) => { $s.push_str($v); }; ($s:ident, int $v:expr) => {{ let mut b = itoa::Buffer::new(); $s.push_str(b.format($v)); }}; - ($s:ident, float $v:expr) => { $s.push_str(&$crate::modules::fstr::format_f64($v)); }; + ($s:ident, float $v:expr) => { $s.push_str(&$crate::util::fstr::format_f64($v)); }; ($s:ident, char $v:expr) => { $s.push($v); }; ($s:ident, bool $v:expr) => { $s.push_str(if $v { "true" } else { "false" }); }; } @@ -63,7 +63,7 @@ macro_rules! s { (@b $s:ident; $l:literal $(, $($r:tt)*)?) => { $s.push_str($l); $($crate::s!(@b $s; $($r)*);)? }; (@b $s:ident; str $v:expr $(, $($r:tt)*)?) => { $s.push_str($v); $($crate::s!(@b $s; $($r)*);)? }; (@b $s:ident; int $v:expr $(, $($r:tt)*)?) => {{ let mut _b = itoa::Buffer::new(); $s.push_str(_b.format($v)); $($crate::s!(@b $s; $($r)*);)? }}; - (@b $s:ident; float $v:expr $(, $($r:tt)*)?) => { $s.push_str(&$crate::modules::fstr::format_f64($v)); $($crate::s!(@b $s; $($r)*);)? }; + (@b $s:ident; float $v:expr $(, $($r:tt)*)?) => { $s.push_str(&$crate::util::fstr::format_f64($v)); $($crate::s!(@b $s; $($r)*);)? }; (@b $s:ident; char $v:expr $(, $($r:tt)*)?) => { $s.push($v); $($crate::s!(@b $s; $($r)*);)? }; (@b $s:ident; bool $v:expr $(, $($r:tt)*)?) => { $s.push_str(if $v { "true" } else { "false" }); $($crate::s!(@b $s; $($r)*);)? }; (cap: $c:expr; $($t:tt)*) => {{ let mut _s = alloc::string::String::with_capacity($c); $crate::s!(@b _s; $($t)*); _s }}; @@ -102,5 +102,5 @@ impl From for alloc::string::String { fn from(e: E) -> Self { e.message() } } #[macro_export] macro_rules! err { - ($($t:tt)*) => { $crate::modules::fstr::E::Custom { msg: $crate::s!($($t)*) } }; + ($($t:tt)*) => { $crate::util::fstr::E::Custom { msg: $crate::s!($($t)*) } }; } diff --git a/compiler/src/modules/fx.rs b/compiler/src/util/fx.rs similarity index 100% rename from compiler/src/modules/fx.rs rename to compiler/src/util/fx.rs diff --git a/compiler/src/modules/sha256.rs b/compiler/src/util/sha256.rs similarity index 100% rename from compiler/src/modules/sha256.rs rename to compiler/src/util/sha256.rs diff --git a/compiler/tests/common.rs b/compiler/tests/common.rs index 32b99c8..23297b4 100644 --- a/compiler/tests/common.rs +++ b/compiler/tests/common.rs @@ -19,7 +19,7 @@ use std::cell::RefCell; use std::collections::{HashMap, HashSet}; use std::rc::Rc; -use compiler_lib::modules::fx::FxHashMap; +use compiler_lib::util::fx::FxHashMap; use compiler_lib::modules::packages::{ NativeBinding, Resolved, Resolver, Manifest, walk_up_dirs, dir_of, join_relative, diff --git a/documentation/implementation/design.md b/documentation/implementation/design.md index ee385f7..c41a613 100644 --- a/documentation/implementation/design.md +++ b/documentation/implementation/design.md @@ -88,10 +88,11 @@ compiler/src/ │ ├── abi_bridge.rs # host_edge_op + dispatch_* │ ├── resolver.rs # walk-up packages.json + native bridge closure │ └── errors.rs + ├── util/ # Internal helpers shared across the compiler (not stdlib) + │ ├── fstr.rs # numeric formatter + s!/push!/err! string macros + │ ├── fx.rs # FxHasher + per-map seeded FxBuildHasher + │ └── sha256.rs # in-tree FIPS 180-4 SHA-256 (used by integrity) └── modules/ - ├── fstr.rs # numeric formatter + s!/push!/err! string macros - ├── fx.rs # FxHasher + per-map seeded FxBuildHasher - ├── sha256.rs # in-tree FIPS 180-4 SHA-256 (used by integrity) ├── lexer/ │ ├── mod.rs │ ├── scan.rs diff --git a/move-utils-to-util-mod.txt b/move-utils-to-util-mod.txt new file mode 100644 index 0000000..c283c06 --- /dev/null +++ b/move-utils-to-util-mod.txt @@ -0,0 +1,53 @@ +Branch: move-utils-to-util-mod +Tech debt: D58 — fx, fstr, sha256 sat under modules/, conflating util helpers with the runtime/stdlib namespace. + +================================================================================ +Commit 1: move fx fstr sha256 from modules/ to util/ +================================================================================ +Files: + - compiler/src/lib.rs (split `modules { ... }` to add a `util { ... }` peer) + - compiler/src/util/fx.rs (renamed from modules/fx.rs) + - compiler/src/util/fstr.rs (renamed from modules/fstr.rs) + - compiler/src/util/sha256.rs (renamed from modules/sha256.rs) + - 21 .rs files: bulk-rewritten use paths + - compiler/README.md and documentation/implementation/design.md tree + diagrams updated to reflect the new layout. + +What was wrong: + `compiler/src/modules/` semantically should mean "runtime modules of + the Edge Python language" — components newcomers would expect to see + the lexer/parser/vm/packages/builtins inside. Instead it also held: + + fstr.rs - numeric formatter + s!/err! string macros (util) + fx.rs - FxHasher + FxBuildHasher (util) + sha256.rs - in-tree SHA-256 used by integrity checks (util) + + These have nothing to do with the Edge Python language; they are + internal compiler helpers. Mixing them under the same parent forced + newcomers to read each file to discover which were stdlib-shaped and + which were util-shaped. It would also block adding real runtime + modules (`math`, `os`, `sys` etc.) without a second naming round. + +Fix: + - Created `compiler/src/util/` as a peer to `modules/`. + - `git mv` for the three files; git tracks them as renames so blame + history is preserved. + - Updated `compiler/src/lib.rs`: + pub mod util { pub mod fx; pub mod fstr; pub mod sha256; } + pub mod modules { pub mod lexer; pub mod vm; pub mod parser; pub mod packages; } + - Bulk substitution across 21 .rs files (sed -i, no command + substitution): `crate::modules::{fx,fstr,sha256}::` → `crate::util::{...}::` + plus the `compiler_lib::modules::fx::` reference in tests/common.rs. + - The s!/err! macros' internal expansion path (`$crate::modules::fstr::format_f64`) + was caught by the same bulk substitution and now reads + `$crate::util::fstr::format_f64`, so call sites need no change. + - Added a /* ... */ doc comment in lib.rs explaining why util/ is + separated from modules/. + +Documentation: + Both the compiler/README.md and documentation/implementation/design.md + tree diagrams listed fstr.rs/fx.rs/sha256.rs under `modules/`. Updated + both to show them under the new `util/` peer. The line-count and + per-file annotations were preserved verbatim. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. From 02c834f408a086329bed677e3935040619925b7f Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:52:03 -0600 Subject: [PATCH 03/31] rename macros to edge-pdk-macros --- edge-pdk/Cargo.toml | 2 +- edge-pdk/macros/Cargo.toml | 4 +-- edge-pdk/src/lib.rs | 6 ++-- rename-macros-to-edge-pdk-macros.txt | 50 ++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 6 deletions(-) create mode 100644 rename-macros-to-edge-pdk-macros.txt diff --git a/edge-pdk/Cargo.toml b/edge-pdk/Cargo.toml index 5aac071..274c400 100644 --- a/edge-pdk/Cargo.toml +++ b/edge-pdk/Cargo.toml @@ -11,4 +11,4 @@ crate-type = ["rlib"] doctest = false [dependencies] -macros = { path = "macros", version = "0.1.0" } +edge-pdk-macros = { path = "macros", version = "0.1.0" } diff --git a/edge-pdk/macros/Cargo.toml b/edge-pdk/macros/Cargo.toml index df17577..e11c2e9 100644 --- a/edge-pdk/macros/Cargo.toml +++ b/edge-pdk/macros/Cargo.toml @@ -1,10 +1,10 @@ [package] -name = "macros" +name = "edge-pdk-macros" version = "0.1.0" edition.workspace = true license.workspace = true repository.workspace = true -description = "Internal proc-macro for the `edge` crate. NOT FOR DIRECT USE — `edge` re-exports the `#[edge]` attribute. This crate exists separately because Rust requires proc-macros to live in a crate with `proc-macro = true`; nesting it at `edge/macros` makes the parent-child relationship explicit." +description = "Internal proc-macro for the `edge-pdk` crate. NOT FOR DIRECT USE — `edge-pdk` re-exports the `#[plugin_fn]` attribute. This crate exists separately because Rust requires proc-macros to live in a crate with `proc-macro = true`; nesting it at `edge-pdk/macros` makes the parent-child relationship explicit." [lib] proc-macro = true diff --git a/edge-pdk/src/lib.rs b/edge-pdk/src/lib.rs index 16dedfb..04fd43e 100644 --- a/edge-pdk/src/lib.rs +++ b/edge-pdk/src/lib.rs @@ -28,14 +28,14 @@ //! } //! ``` //! -//! The `#[plugin_fn]` attribute lives in the internal `macros` +//! The `#[plugin_fn]` attribute lives in the internal `edge-pdk-macros` //! sub-crate and is re-exported from here. #![cfg_attr(not(test), no_std)] extern crate alloc; -pub use macros::plugin_fn; +pub use edge_pdk_macros::plugin_fn; use alloc::{string::{String, ToString}, vec::Vec}; @@ -69,7 +69,7 @@ unsafe extern "C" { } /// Stash an error so the host sees it after the export returns 1. -/// Used by the `#[edge]` macro when a user function returns Err(_). +/// Used by the `#[plugin_fn]` macro when a user function returns Err(_). #[doc(hidden)] pub fn __stash_error(e: Error) { let kind = e.kind(); diff --git a/rename-macros-to-edge-pdk-macros.txt b/rename-macros-to-edge-pdk-macros.txt new file mode 100644 index 0000000..8014448 --- /dev/null +++ b/rename-macros-to-edge-pdk-macros.txt @@ -0,0 +1,50 @@ +Branch: rename-macros-to-edge-pdk-macros +Tech debt: D52 — proc-macro crate named `macros` is generic and not crates.io publishable. + +================================================================================ +Commit 1: rename macros crate to edge-pdk-macros +================================================================================ +Files: + - edge-pdk/macros/Cargo.toml (package name + description) + - edge-pdk/Cargo.toml (dependency rename) + - edge-pdk/src/lib.rs (re-export rename + module-doc + #[plugin_fn] attribute name) + - Cargo.lock (regenerated by cargo) + +What was wrong: + The proc-macro crate was published as `name = "macros"`. Three problems: + + 1. Unpublishable to crates.io: the name "macros" is taken and even if + it weren't, no curator would approve the name as descriptive of a + specific crate. + 2. `cargo tree` shows `macros 0.1.0` with no context — readers cannot + tell which parent crate it belongs to. + 3. The convention for proc-macro sibling crates is `-macros` + (serde_derive, tokio-macros, clap_derive) — drift from convention + forces every new contributor to learn the local exception. + + The Cargo.toml description also referenced "the `edge` crate" and the + "`#[edge]` attribute" — both wrong: the parent is `edge-pdk` and the + exported attribute is `#[plugin_fn]` (edge-pdk/src/lib.rs:38). The + description doc-comment in src/lib.rs:72 had the same `#[edge]` typo + on `__stash_error`. + +Fix: + - edge-pdk/macros/Cargo.toml: `name = "edge-pdk-macros"`. Description + rewritten to reference `edge-pdk` and `#[plugin_fn]` instead of the + fictional `edge` / `#[edge]`. + - edge-pdk/Cargo.toml: `edge-pdk-macros = { path = "macros", version = "0.1.0" }`. + - edge-pdk/src/lib.rs: `pub use edge_pdk_macros::plugin_fn;` and a + module-doc fix and the doc-comment on `__stash_error` updated to + `#[plugin_fn]`. + - Cargo.lock: cargo regenerated the entry; committed. + + Workspace member paths (`edge-pdk/macros`) did NOT need updating — + workspace members are addressed by directory, not crate name. + +Documentation: + documentation/implementation/design.md mentions "string macros" but + refers to the s!/push!/err! macros, not the crate name. No doc + updates required for the rename. Examples directory contains no + references to the old `macros` name. + +Tests: cargo test -> 6/6 passed. From 04359f4d455fc1929f1f666ad15a60f033f95e23 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 19:42:26 -0600 Subject: [PATCH 04/31] move pdk internals to hidden submodule --- edge-pdk/macros/src/lib.rs | 8 ++--- edge-pdk/src/lib.rs | 53 +++++++++++++++------------ hide-pdk-internals-submodule.txt | 62 ++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 26 deletions(-) create mode 100644 hide-pdk-internals-submodule.txt diff --git a/edge-pdk/macros/src/lib.rs b/edge-pdk/macros/src/lib.rs index 02fe78f..240e1fa 100644 --- a/edge-pdk/macros/src/lib.rs +++ b/edge-pdk/macros/src/lib.rs @@ -73,7 +73,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { let h = unsafe { *argv.add(#i) }; let #name: #ty = match <#ty as ::edge_pdk::FromValue>::from_handle(h) { Ok(v) => v, - Err(e) => { ::edge_pdk::__stash_error(e); return 1; } + Err(e) => { ::edge_pdk::__internals::stash_error(e); return 1; } }; } }).collect(); @@ -83,7 +83,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { quote! { match #impl_name(#(#arg_names),*) { Ok(v) => v, - Err(e) => { ::edge_pdk::__stash_error(e); return 1; } + Err(e) => { ::edge_pdk::__internals::stash_error(e); return 1; } } } } else { @@ -105,7 +105,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { out: *mut u32, ) -> i32 { if (argc as usize) != #argc_expected { - ::edge_pdk::__stash_error(::edge_pdk::Error::Type( + ::edge_pdk::__internals::stash_error(::edge_pdk::Error::Type( ::alloc::format!( "{} expects {} positional args, got {}", stringify!(#user_name), #argc_expected, argc))); @@ -121,7 +121,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { unsafe { *out = h.into_raw(); } 0 } - Err(e) => { ::edge_pdk::__stash_error(e); 1 } + Err(e) => { ::edge_pdk::__internals::stash_error(e); 1 } } } }; diff --git a/edge-pdk/src/lib.rs b/edge-pdk/src/lib.rs index 04fd43e..b420695 100644 --- a/edge-pdk/src/lib.rs +++ b/edge-pdk/src/lib.rs @@ -15,7 +15,8 @@ //! `edge_throw` / `edge_take_error`. //! * `FromValue` / `IntoValue` traits with primitive impls (`i64`, //! `f64`, `bool`, `String`, `&str`, `Option`, `Handle`). -//! * The `__edge_alloc` export the host shim needs for argv staging. +//! * The `__edge_alloc` export the host shim needs for argv staging +//! (lives in the hidden `__internals` module so glob imports stay clean). //! //! Author code: //! @@ -37,7 +38,7 @@ extern crate alloc; pub use edge_pdk_macros::plugin_fn; -use alloc::{string::{String, ToString}, vec::Vec}; +use alloc::{string::String, vec::Vec}; /* ---------- Wire imports --------------------------------------------- */ @@ -68,15 +69,6 @@ unsafe extern "C" { pub fn edge_throw(kind: u32, msg_ptr: *const u8, msg_len: u32); } -/// Stash an error so the host sees it after the export returns 1. -/// Used by the `#[plugin_fn]` macro when a user function returns Err(_). -#[doc(hidden)] -pub fn __stash_error(e: Error) { - let kind = e.kind(); - let msg = e.message().to_string(); - unsafe { edge_throw(kind, msg.as_ptr(), msg.len() as u32); } -} - /* ---------- Op codes & tags (must match bridge.rs spec) -------------- */ #[allow(non_camel_case_types)] @@ -100,17 +92,34 @@ pub mod tag { pub const BYTES: u32 = 4; } -/* ---------- Allocator the host calls to stage argv buffers ----------- */ - -/// Host-side argv stager. The shim allocates space in this module's -/// linear memory before invoking each export; the layout is -/// [u32; argc] for argv and a single u32 for `out`. We use a leak-free -/// bump scheme — every call lives entirely on the heap, so the leak is -/// reclaimed when the WASM instance is torn down. -#[unsafe(no_mangle)] -pub extern "C" fn __edge_alloc(size: u32) -> *mut u8 { - let v = alloc::vec![0u8; size as usize]; - alloc::boxed::Box::into_raw(v.into_boxed_slice()) as *mut u8 +/* ---------- Internals — macro contract surface, not user API --------- */ + +/* Sub-module so `use edge_pdk::*;` cannot pull these into a plugin + author's namespace. The `#[plugin_fn]` expansion qualifies the path + explicitly (`::edge_pdk::__internals::stash_error`), and `__edge_alloc` + stays a no_mangle WASM export regardless of Rust module nesting. */ +#[doc(hidden)] +pub mod __internals { + use super::Error; + use alloc::string::ToString; + + /* Used by #[plugin_fn] expansion when a user fn returns Err(_). */ + pub fn stash_error(e: Error) { + let kind = e.kind(); + let msg = e.message().to_string(); + unsafe { super::edge_throw(kind, msg.as_ptr(), msg.len() as u32); } + } + + /* Host-side argv stager. The shim allocates space in this module's + linear memory before invoking each export; the layout is + [u32; argc] for argv and a single u32 for `out`. We use a leak-free + bump scheme — every call lives entirely on the heap, so the leak is + reclaimed when the WASM instance is torn down. */ + #[unsafe(no_mangle)] + pub extern "C" fn __edge_alloc(size: u32) -> *mut u8 { + let v = alloc::vec![0u8; size as usize]; + alloc::boxed::Box::into_raw(v.into_boxed_slice()) as *mut u8 + } } /* ---------- Errors --------------------------------------------------- */ diff --git a/hide-pdk-internals-submodule.txt b/hide-pdk-internals-submodule.txt new file mode 100644 index 0000000..9dc53d5 --- /dev/null +++ b/hide-pdk-internals-submodule.txt @@ -0,0 +1,62 @@ +Branch: hide-pdk-internals-submodule +Tech debt: D56 (full) — finishes the prelude work by removing __stash_error / __edge_alloc from the crate root, where `use edge_pdk::*;` could still glob them. + +================================================================================ +Commit 1: relocate pdk macro contract surface to __internals +================================================================================ +Files: + - edge-pdk/src/lib.rs + - edge-pdk/macros/src/lib.rs + +What was wrong: + After the prelude branch, plugin authors had a clean opt-in via + `use edge_pdk::prelude::*;`, but the legacy `use edge_pdk::*;` in + the wasm-abi doc and existing plugins still glob-imported every + crate-root pub item — including the two #[doc(hidden)] symbols that + exist purely as the proc-macro's contract surface: + - pub fn __stash_error(e: Error) + - pub extern "C" fn __edge_alloc(size: u32) -> *mut u8 + + #[doc(hidden)] only hides them from rustdoc; it does not exclude them + from a glob. So renaming or refactoring those internals remained a + silent breaking change for any plugin that touched them + (intentionally or not) through the glob. + +Fix: + Created a #[doc(hidden)] sub-module: + + pub mod __internals { + pub fn stash_error(e: Error) { ... } + #[unsafe(no_mangle)] + pub extern "C" fn __edge_alloc(...) -> *mut u8 { ... } + } + + `use edge_pdk::*;` does NOT glob-import items from sub-modules, so + these symbols are now invisible to plugin authors who use the glob. + Renamed `__stash_error` to `stash_error` inside the sub-module — + the leading double-underscore was a "do not touch" warning that + becomes redundant once the symbol lives in `__internals`. + + __edge_alloc keeps its name and #[no_mangle] so the host shim's + WASM call site is unchanged. The Rust path moved, but the WASM + export symbol is identical. + + Updated the macro expansion in edge-pdk/macros/src/lib.rs from + `::edge_pdk::__stash_error(...)` to + `::edge_pdk::__internals::stash_error(...)` (4 sites). Removed the + now-unused `ToString` import from the lib.rs root (it lives inside + __internals). + + Verified the slugify-mod example still builds cleanly against the + wasm32-unknown-unknown target via the macro. + +Documentation: + reference/wasm-abi.md mentions `__edge_alloc` as the WASM export + name only; the symbol name is unchanged so the doc stays correct. + Updated the lib.rs module-doc bullet that listed __edge_alloc to + note where it now lives in source. + +Build & tests: + cargo build -p edge-pdk -p macros -> ok (no warnings). + cargo build -p slugify-mod --target wasm32-unknown-unknown --release -> ok. + cargo test -> 6/6 passed. From e455a8608d68da6ca3fea4983755922fc23f5f34 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 20:43:37 -0600 Subject: [PATCH 05/31] centralize nanbox constants in abi --- centralize-nanbox-constants.txt | 75 ++++++++++++++++++++++++++++ compiler/src/abi.rs | 45 ++++++++++------- compiler/src/modules/vm/types/mod.rs | 19 +++---- 3 files changed, 110 insertions(+), 29 deletions(-) create mode 100644 centralize-nanbox-constants.txt diff --git a/centralize-nanbox-constants.txt b/centralize-nanbox-constants.txt new file mode 100644 index 0000000..8411b0a --- /dev/null +++ b/centralize-nanbox-constants.txt @@ -0,0 +1,75 @@ +Branch: centralize-nanbox-constants +Tech debt: D20 — NaN-boxing constants existed in three independent copies (classify_encode, classify_decode, vm::types::Val), drifting was a silent ABI break. + +================================================================================ +Commit 1: lift NaN-box layout into abi::nan_box +================================================================================ +Files: + - compiler/src/abi.rs + - compiler/src/modules/vm/types/mod.rs + +What was wrong: + Three places defined the same constants: + + 1. classify_encode (abi.rs:188+) — inner consts: QNAN, TAG_NONE_BITS, + TAG_TRUE_BITS, TAG_FALSE_BITS, TAG_INT_BITS. + 2. classify_decode (abi.rs:247+) — inner consts: QNAN, SIGN, TAG_INT. + 3. vm::types::Val (mod.rs:64+) — module-level consts: QNAN, SIGN, + TAG_UNDEF, TAG_NONE, TAG_TRUE, TAG_FALSE, TAG_INT, TAG_HEAP. + + All three carried the same numeric values, but cargo had no way to + prove that. A bump in any one (e.g. moving the heap tag bit nibble) + would compile cleanly and silently corrupt round-tripping at the + wire boundary. The doc-comments openly admitted the duplication + ("must match host Val impl", "Same NaN-boxing constants as + classify_encode") without proposing a remedy. + + The 47-bit int payload mask (0x0000_FFFF_FFFF_FFFF) and the 28-bit + heap-index mask (0x0FFF_FFFF) were also embedded inline at the use + sites without symbolic names. + +Fix: + Added a sealed `pub mod nan_box` at the top of abi.rs holding every + NaN-boxing constant in a single source-of-truth: + + pub mod nan_box { + pub const QNAN: u64 = 0x7FFC_0000_0000_0000; + pub const SIGN: u64 = 0x8000_0000_0000_0000; + pub const TAG_UNDEF: u64 = QNAN; + pub const TAG_NONE: u64 = QNAN | 1; + pub const TAG_TRUE: u64 = QNAN | 2; + pub const TAG_FALSE: u64 = QNAN | 3; + pub const TAG_INT: u64 = QNAN | SIGN; + pub const TAG_HEAP: u64 = QNAN | 4; + pub const INT_PAYLOAD_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; + } + + - classify_encode now `use nan_box::*;` and references TAG_NONE / + TAG_TRUE / TAG_FALSE / TAG_INT / INT_PAYLOAD_MASK directly. Two + of the locally-defined names lost their `_BITS` suffix because + the canonical names already convey "bit pattern" through context. + - classify_decode same treatment, plus the magic 0x0000_FFFF_FFFF_FFFF + now reads INT_PAYLOAD_MASK and the QNAN|1/2/3 literals collapse to + TAG_NONE/TAG_TRUE/TAG_FALSE. + - vm::types::Val replaced its eight module-level `const` declarations + with `use crate::abi::nan_box::{...}`. The two inline mask + occurrences inside `Val::int` and `Val::as_int` now name + INT_PAYLOAD_MASK explicitly. + - 28-bit heap index mask (0x0FFF_FFFF) left inline since it appears + once and is structurally bound to the `>> 4` shift in the same + expression. + + /* ... */ doc comment in the lexer/parser style on the new module + records the sealing rule (any layout change is an ABI bump, ties + into D22's version handshake). + + Numeric values are byte-identical to before. Tests prove the + layout still round-trips through the VM and the wire codec. + +Documentation: + README.md and design.md describe the NaN-box layout at the abstract + level (e.g. "Int = QNAN | SIGN | i47"). The descriptions remain + accurate; the file-path internals are not user-visible. No doc + updates required. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/compiler/src/abi.rs b/compiler/src/abi.rs index b76dcba..3dca585 100644 --- a/compiler/src/abi.rs +++ b/compiler/src/abi.rs @@ -7,6 +7,23 @@ Sealed contract for modules. use alloc::{string::String, vec::Vec}; +/* Source-of-truth NaN-boxing layout. Both the wire codec below and + vm::types::Val import from here, so any change touches one site + instead of three. Reserved for the `Sealed contract — v1` set: a + layout change forces a wasm-abi version bump. */ +pub mod nan_box { + pub const QNAN: u64 = 0x7FFC_0000_0000_0000; + pub const SIGN: u64 = 0x8000_0000_0000_0000; + pub const TAG_UNDEF: u64 = QNAN; + pub const TAG_NONE: u64 = QNAN | 1; + pub const TAG_TRUE: u64 = QNAN | 2; + pub const TAG_FALSE: u64 = QNAN | 3; + pub const TAG_INT: u64 = QNAN | SIGN; + pub const TAG_HEAP: u64 = QNAN | 4; + /* 47-bit signed integer payload mask (two's-complement, sign bit at bit 47). */ + pub const INT_PAYLOAD_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; +} + /* Op codes (sealed) */ #[allow(non_camel_case_types)] @@ -184,27 +201,22 @@ pub enum EncodeRequest<'a> { Invalid, } -// Maps (tag, bytes) to EncodeRequest. NaN-boxing layout is sealed here; changes require ABI bump. +// Maps (tag, bytes) to EncodeRequest. NaN-boxing layout is sealed in `nan_box`; changes require ABI bump. pub fn classify_encode(tag: u32, bytes: &[u8]) -> EncodeRequest<'_> { - /* NaN-boxing constants; must match host Val impl. */ - const QNAN: u64 = 0x7FFC_0000_0000_0000; - const TAG_NONE_BITS: u64 = QNAN | 1; - const TAG_TRUE_BITS: u64 = QNAN | 2; - const TAG_FALSE_BITS: u64 = QNAN | 3; - const TAG_INT_BITS: u64 = QNAN | 0x8000_0000_0000_0000; + use nan_box::*; match Tag::from_u32(tag) { - Some(Tag::None) => EncodeRequest::Direct(TAG_NONE_BITS), + Some(Tag::None) => EncodeRequest::Direct(TAG_NONE), Some(Tag::Bool) => { let b = !bytes.is_empty() && bytes[0] != 0; - EncodeRequest::Direct(if b { TAG_TRUE_BITS } else { TAG_FALSE_BITS }) + EncodeRequest::Direct(if b { TAG_TRUE } else { TAG_FALSE }) } Some(Tag::Int) => { if bytes.len() != 8 { return EncodeRequest::Invalid; } let mut buf = [0u8; 8]; buf.copy_from_slice(bytes); let i = i64::from_le_bytes(buf); - EncodeRequest::Direct(TAG_INT_BITS | (i as u64 & 0x0000_FFFF_FFFF_FFFF)) + EncodeRequest::Direct(TAG_INT | (i as u64 & INT_PAYLOAD_MASK)) } Some(Tag::Float) => { if bytes.len() != 8 { return EncodeRequest::Invalid; } @@ -245,10 +257,7 @@ impl PrimitiveBytes { // Classifies Val bits into Primitive/Heap/Invalid; Heap means host must read from HeapPool. pub fn classify_decode(val_bits: u64) -> DecodeBits { - /* Same NaN-boxing constants as classify_encode. */ - const QNAN: u64 = 0x7FFC_0000_0000_0000; - const SIGN: u64 = 0x8000_0000_0000_0000; - const TAG_INT: u64 = QNAN | SIGN; + use nan_box::*; // Float: any non-QNAN-tagged pattern. if (val_bits & QNAN) != QNAN { @@ -259,7 +268,7 @@ pub fn classify_decode(val_bits: u64) -> DecodeBits { } // Int: QNAN|SIGN with payload. if (val_bits & (QNAN | SIGN)) == TAG_INT { - let raw = (val_bits & 0x0000_FFFF_FFFF_FFFF) as i64; + let raw = (val_bits & INT_PAYLOAD_MASK) as i64; let sign_extended = (raw << 16) >> 16; return DecodeBits::Primitive { tag: Tag::Int as u32, @@ -269,17 +278,17 @@ pub fn classify_decode(val_bits: u64) -> DecodeBits { // Singletons and heap handles. let lower = val_bits & 0xF; if (val_bits & QNAN) == QNAN && (val_bits & SIGN) == 0 { - if val_bits == QNAN | 1 { + if val_bits == TAG_NONE { return DecodeBits::Primitive { tag: Tag::None as u32, bytes: PrimitiveBytes::None, }; } - if val_bits == QNAN | 2 { + if val_bits == TAG_TRUE { return DecodeBits::Primitive { tag: Tag::Bool as u32, bytes: PrimitiveBytes::Bool(1), }; } - if val_bits == QNAN | 3 { + if val_bits == TAG_FALSE { return DecodeBits::Primitive { tag: Tag::Bool as u32, bytes: PrimitiveBytes::Bool(0), }; diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index ccf4db6..3fb3614 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -62,15 +62,12 @@ impl ExternFn { } /* NaN-boxed 8-byte value: int (47-bit), float, bool, None, undef, or heap idx. - Tags live in the QNAN bit pattern; payload bits decide the variant. */ -const QNAN: u64 = 0x7FFC_0000_0000_0000; -const SIGN: u64 = 0x8000_0000_0000_0000; -const TAG_UNDEF: u64 = QNAN; // payload all zero — distinct from None/True/False/Heap -const TAG_NONE: u64 = QNAN | 1; -const TAG_TRUE: u64 = QNAN | 2; -const TAG_FALSE: u64 = QNAN | 3; -const TAG_INT: u64 = QNAN | SIGN; -const TAG_HEAP: u64 = QNAN | 4; + Layout sealed in `crate::abi::nan_box`; re-imported here as the single + source of truth across the wire codec and the VM. */ +use crate::abi::nan_box::{ + QNAN, SIGN, TAG_UNDEF, TAG_NONE, TAG_TRUE, TAG_FALSE, TAG_INT, TAG_HEAP, + INT_PAYLOAD_MASK, +}; #[derive(Clone, Copy, Debug)] pub struct Val(pub(crate) u64); @@ -112,7 +109,7 @@ impl Val { pub const INT_MAX: i64 = 0x0000_7FFF_FFFF_FFFF; pub const INT_MIN: i64 = -0x0000_8000_0000_0000; #[inline(always)] pub fn int(i: i64) -> Self { - Self(TAG_INT | (i as u64 & 0x0000_FFFF_FFFF_FFFF)) + Self(TAG_INT | (i as u64 & INT_PAYLOAD_MASK)) } #[inline(always)] pub fn int_checked(i: i64) -> Option { if !(Self::INT_MIN..=Self::INT_MAX).contains(&i) { None } else { Some(Self::int(i)) } @@ -141,7 +138,7 @@ impl Val { #[inline(always)] pub fn raw(&self) -> u64 { self.0 } #[inline(always)] pub fn from_raw(u: u64) -> Self { Self(u) } #[inline(always)] pub fn as_int(&self) -> i64 { - let raw = (self.0 & 0x0000_FFFF_FFFF_FFFF) as i64; + let raw = (self.0 & INT_PAYLOAD_MASK) as i64; (raw << 16) >> 16 } #[inline(always)] pub fn as_bool(&self) -> bool { self.0 == TAG_TRUE } From 1aa066e4af063f22182c55fd7d1f8a93619ccc75 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 15:23:18 -0600 Subject: [PATCH 06/31] extract read_src helper --- centralize-src-buffer-read.txt | 47 ++++++++++++++++++++++++++++++++++ compiler/src/main/exports.rs | 21 +++++++++------ 2 files changed, 60 insertions(+), 8 deletions(-) create mode 100644 centralize-src-buffer-read.txt diff --git a/centralize-src-buffer-read.txt b/centralize-src-buffer-read.txt new file mode 100644 index 0000000..90c55f5 --- /dev/null +++ b/centralize-src-buffer-read.txt @@ -0,0 +1,47 @@ +Branch: centralize-src-buffer-read +Tech debt: D17 / D62 — extract_imports and run duplicated SRC reading with divergent error handling. + +================================================================================ +Commit 1: extract read_src helper for SRC buffer +================================================================================ +File: compiler/src/main/exports.rs + +What was wrong: + Both extern "C" entry points opened the host-owned SRC buffer with the + same five-line preamble: + + let len = len.min(SZ); + let src = match core::str::from_utf8(unsafe { + core::slice::from_raw_parts(core::ptr::addr_of!(SRC) as *const u8, len) + }) { + Ok(s) => s, + Err(_) => ... + }; + + The cap-and-validate logic was identical, but the error paths + diverged: extract_imports silently returned write_out(""), while run + formatted "input rejected: invalid utf-8 at byte N". Adding a third + caller would have spawned a third dialect of the same boundary check. + +Fix: + Introduced a single private helper: + + unsafe fn read_src(len: usize) -> Result<&'static str, Utf8Error> + + that performs the cap-and-validate once and returns the &'static str + view (SRC is `static`, so the slice's lifetime is genuinely 'static). + + Both callers now match on the Result and decide how to surface the + failure: extract_imports keeps its silent-empty contract, run keeps + its user-facing error. The behaviour is byte-identical to before; + only the duplication is gone. + + A new caller now has one place to look and one decision to make + ("silent or formatted error?"), not five duplicated lines to copy. + +Documentation: + No .md docs describe the boundary handling at this granularity (the + mentions of `run()` in design.md / language docs refer to Python's + builtin `run()`, not the WASM export). No doc updates required. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/compiler/src/main/exports.rs b/compiler/src/main/exports.rs index 42dca41..f84a236 100644 --- a/compiler/src/main/exports.rs +++ b/compiler/src/main/exports.rs @@ -68,12 +68,20 @@ pub unsafe extern "C" fn reset_modules() { error_stash().clear(); } -#[unsafe(no_mangle)] -pub unsafe extern "C" fn extract_imports(len: usize) -> usize { +/* Reads up to SZ bytes from the host-owned SRC buffer and validates UTF-8. + `len` is capped so the slice never extends past the buffer; callers decide + how to surface a UTF-8 failure (silent vs. user-facing error). */ +unsafe fn read_src(len: usize) -> Result<&'static str, core::str::Utf8Error> { let len = len.min(SZ); - let src = match core::str::from_utf8(unsafe { + let bytes = unsafe { core::slice::from_raw_parts(core::ptr::addr_of!(SRC) as *const u8, len) - }) { + }; + core::str::from_utf8(bytes) +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn extract_imports(len: usize) -> usize { + let src = match unsafe { read_src(len) } { Ok(s) => s, Err(_) => return unsafe { write_out("") }, }; @@ -84,10 +92,7 @@ pub unsafe extern "C" fn extract_imports(len: usize) -> usize { #[unsafe(no_mangle)] pub unsafe extern "C" fn run(len: usize) -> usize { - let len = len.min(SZ); - let src = match core::str::from_utf8(unsafe { - core::slice::from_raw_parts(core::ptr::addr_of!(SRC) as *const u8, len) - }) { + let src = match unsafe { read_src(len) } { Ok(s) => s, Err(e) => return unsafe { write_out(&s!("input rejected: invalid utf-8 at byte ", int e.valid_up_to())) From 6db64c869ce85953f1eef6b8346838f9363c801d Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 20:47:32 -0600 Subject: [PATCH 07/31] extract native binding closure to abi_bridge --- compiler/src/main/abi_bridge.rs | 51 +++++++++++++++++++++-- compiler/src/main/resolver.rs | 59 ++++++++------------------- tighten-resolver-internals.txt | 72 +++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 47 deletions(-) create mode 100644 tighten-resolver-internals.txt diff --git a/compiler/src/main/abi_bridge.rs b/compiler/src/main/abi_bridge.rs index 2a2ced4..ec6e01d 100644 --- a/compiler/src/main/abi_bridge.rs +++ b/compiler/src/main/abi_bridge.rs @@ -1,12 +1,13 @@ -use crate::abi::{classify_decode, classify_encode, DecodeBits, EncodeRequest, Op, PrimitiveBytes, TAG_INVALID}; +use crate::abi::{classify_decode, classify_encode, DecodeBits, EncodeRequest, ErrorKind, Op, PrimitiveBytes, TAG_INVALID}; use crate::modules::vm::types::{HeapObj, Val, VmErr}; use crate::modules::vm::handlers::methods::{lookup_method, dispatch_method}; -use alloc::{rc::Rc, string::{String, ToString}, vec, vec::Vec}; +use crate::modules::packages::NativeBinding; +use alloc::{rc::Rc, string::{String, ToString}, sync::Arc, vec, vec::Vec}; use core::cell::RefCell; use crate::s; -use super::{error_stash, get_val, handles, put_val, with_recv, with_vm}; -use super::errors::stash_error; +use super::{error_stash, get_val, handles, host_call_native, put_val, with_recv, with_vm}; +use super::errors::{error_from_kind, stash_error}; // Universal dispatch. Returns 0 + handle in `*out_handle`, or 1 + stashed error. #[unsafe(no_mangle)] @@ -312,3 +313,45 @@ pub unsafe extern "C" fn host_edge_take_error(out_kind: *mut u32, dst: *mut u8, } bytes.len() as i32 } + +/* Builds a NativeBinding whose closure translates a VM CallExtern call + into the universal wire ABI: stage args as handles, invoke + `host_call_native(id, ...)`, drain status into Result, and + release every handle (including out_handle) before returning. + + Lives here — not in resolver.rs — because the body is pure ABI + marshalling. The resolver only needs to ask "give me a binding for + (name, id)" and forget about handle plumbing. `pure: false` because + any guest call may have side effects through the host. */ +pub(super) fn make_native_binding(name: String, id: u32) -> NativeBinding { + let closure = move |_: &mut crate::modules::vm::types::HeapPool, args: &[Val]| -> Result { + /* 1. Register args as handles the guest will see. */ + let argv: Vec = args.iter().map(|v| put_val(*v)).collect(); + let mut out_handle: u32 = 0; + + /* 2. Call guest export through the host shim. */ + let status = unsafe { + host_call_native( + id, + argv.as_ptr(), argv.len() as u32, + &mut out_handle as *mut u32, + ) + }; + + /* 3. Translate status/out_handle into Result. Read result + BEFORE releasing — argv release frees the slots `result` may + reference if the guest returned one of its inputs. */ + if status != 0 { + for h in &argv { handles().release(*h); } + let (kind, msg) = error_stash().take() + .unwrap_or((ErrorKind::Runtime as u32, String::from("native call failed"))); + return Err(error_from_kind(kind, msg)); + } + let result = get_val(out_handle) + .ok_or(VmErr::Runtime("native returned invalid handle"))?; + for h in &argv { handles().release(*h); } + handles().release(out_handle); + Ok(result) + }; + NativeBinding { name, func: Arc::new(closure), pure: false } +} diff --git a/compiler/src/main/resolver.rs b/compiler/src/main/resolver.rs index 777fd8e..cb0b341 100644 --- a/compiler/src/main/resolver.rs +++ b/compiler/src/main/resolver.rs @@ -1,12 +1,15 @@ -use crate::modules::vm::types::{HeapPool, Val, VmErr}; use crate::modules::packages::{NativeBinding, Resolved, Resolver, parse_manifest, walk_up_dirs, dir_of, join_relative}; use crate::util::fx::FxHashSet; -use alloc::{boxed::Box, string::{String, ToString}, sync::Arc, vec::Vec}; +use alloc::{boxed::Box, string::{String, ToString}, vec::Vec}; use crate::s; -use super::{ModuleEntry, error_stash, get_val, handles, host_fetch_bytes, manifests, put_val, registry}; -use super::errors::error_from_kind; -use crate::abi::ErrorKind; +use super::{ModuleEntry, host_fetch_bytes, manifests, registry}; +use super::abi_bridge::make_native_binding; + +/* Hard cap on packages.json `extends` chain length. Prevents an attacker- + crafted manifest from looping the resolver indefinitely; 32 is well above + any sane real-world workspace depth. */ +const MAX_PACKAGES_HOPS: u32 = 32; pub(super) struct WasmHostResolver { pub(super) dir: String } @@ -47,8 +50,11 @@ impl WasmHostResolver { let mut search_dir = start_dir.to_string(); let mut hops: u32 = 0; loop { - if hops > 32 { - return Err(s!("packages.json walk-up exceeded 32 hops resolving '", str name, "'")); + if hops > MAX_PACKAGES_HOPS { + return Err(s!( + "packages.json walk-up exceeded ", + int MAX_PACKAGES_HOPS as i64, + " hops resolving '", str name, "'")); } hops += 1; @@ -111,42 +117,9 @@ impl WasmHostResolver { canonical: spec.to_string(), }), ModuleEntry::Native(funcs) => { - let bindings: Vec = funcs.iter().map(|(name, id)| { - let id = *id; - // Translate VM CallExtern into the universal ABI wire shape. - let closure = move |_: &mut HeapPool, args: &[Val]| -> Result - { - // 1. Register args as handles. - let argv: Vec = args.iter().map(|v| put_val(*v)).collect(); - let mut out_handle: u32 = 0; - - // 2. Call guest export through the host shim. - let status = unsafe { - super::host_call_native( - id, - argv.as_ptr(), argv.len() as u32, - &mut out_handle as *mut u32, - ) - }; - - // 3. Translate status/out_handle into Result. Read result BEFORE releasing — order matters. - if status != 0 { - for h in &argv { handles().release(*h); } - let (kind, msg) = error_stash().take().unwrap_or((ErrorKind::Runtime as u32, String::from("native call failed"))); - return Err(error_from_kind(kind, msg)); - } - let result = get_val(out_handle) - .ok_or(VmErr::Runtime("native returned invalid handle"))?; - for h in &argv { handles().release(*h); } - handles().release(out_handle); - Ok(result) - }; - NativeBinding { - name: name.clone(), - func: Arc::new(closure), - pure: false, - } - }).collect(); + let bindings: Vec = funcs.iter() + .map(|(name, id)| make_native_binding(name.clone(), *id)) + .collect(); Ok(Resolved::Native { bindings, canonical: spec.to_string(), diff --git a/tighten-resolver-internals.txt b/tighten-resolver-internals.txt new file mode 100644 index 0000000..ae081f7 --- /dev/null +++ b/tighten-resolver-internals.txt @@ -0,0 +1,72 @@ +Branch: tighten-resolver-internals +Tech debt: D60 — WasmHostResolver mixed package resolution with ABI marshalling and used a magic 32-hop constant inline. + +================================================================================ +Commit 1: pull native-binding closure into abi_bridge and name the hop limit +================================================================================ +Files: + - compiler/src/main/resolver.rs + - compiler/src/main/abi_bridge.rs + +What was wrong: + resolve_canonical (resolver.rs:113-149) carried a 27-line closure that + was pure ABI plumbing: stage Vals as handles, invoke + host_call_native via FFI, drain status into Result, release every + handle. Resolver had to import HeapPool / Val / VmErr / Arc / + put_val / get_val / handles / error_stash / error_from_kind / + ErrorKind / host_call_native — eleven symbols whose only purpose + was to make the closure compile. + + Mixing concerns meant any change to the wire-marshalling rules + (e.g. tightening release ordering, surfacing a different default + error kind) had to be made in the wrong file. Reviewers had to + read the resolver to audit ABI safety. + + Separately, the 32-hop walk-up cap was an inline magic number with + no name. The error message hard-coded "32 hops" twice (the literal + in the if-guard and the literal in the s! formatting), so a future + bump would silently desync. + +Fix: + Added `pub(super) fn make_native_binding(name: String, id: u32) -> NativeBinding` + to abi_bridge.rs. The function owns the closure body. resolver.rs's + resolve_canonical Native arm collapses to one line per binding: + + let bindings: Vec = funcs.iter() + .map(|(name, id)| make_native_binding(name.clone(), *id)) + .collect(); + + Resolver lost six imports (HeapPool, Val, VmErr, Arc, get_val, + handles, put_val, error_stash, error_from_kind, ErrorKind, + host_call_native), and is now visibly a "find the right module" + module — the only unsafe FFI symbol it still touches is + host_fetch_bytes for manifest IO. + + Named the cap as `const MAX_PACKAGES_HOPS: u32 = 32;` and rewrote + the error message to interpolate the constant, so a bump touches + one site: + + s!("packages.json walk-up exceeded ", + int MAX_PACKAGES_HOPS as i64, " hops resolving '", str name, "'") + + Behaviour unchanged: the runtime cap is still 32, the message is + byte-identical to before. + + /* ... */ doc comment in the lexer/parser style records the + motivation for both the cap and the closure relocation. + + This addresses the resolver/ABI separation half of D60 and the + named-constant point. Two follow-ups remain in the cache: + - Replace `manifests()` Vec<(String, Manifest)> with FxHashMap for + O(1) lookup (touches mod.rs static type). + - Split the resolver into ManifestStore + ModuleRegistry + + WasmHostResolver as separate types. + Both are larger structural moves and stay scoped to a separate + branch. + +Documentation: + reference/imports.md mentions "32 hops" verbatim. The constant + still equals 32 and the runtime message still produces "32 hops", + so no doc updates required. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. From a1ea0508936b088c010d8451bf57fa7704c9e8c3 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:56:37 -0600 Subject: [PATCH 08/31] add edge_pdk prelude module --- add-edge-pdk-prelude.txt | 52 ++++++++++++++++++++++++++++++++++++++++ edge-pdk/src/lib.rs | 10 ++++++++ 2 files changed, 62 insertions(+) create mode 100644 add-edge-pdk-prelude.txt diff --git a/add-edge-pdk-prelude.txt b/add-edge-pdk-prelude.txt new file mode 100644 index 0000000..5ae8665 --- /dev/null +++ b/add-edge-pdk-prelude.txt @@ -0,0 +1,52 @@ +Branch: add-edge-pdk-prelude +Tech debt: D56 — `use edge_pdk::*;` glob-imports #[doc(hidden)] symbols. + +================================================================================ +Commit 1: introduce edge_pdk::prelude +================================================================================ +File: edge-pdk/src/lib.rs + +What was wrong: + Plugin authors are expected to write `use edge_pdk::*;` (this is the + pattern in the wasm-abi doc and the slugify-mod example). The glob + brings in everything that's `pub` at the crate root, including the + two macro-internal symbols documented as #[doc(hidden)]: + + pub fn __stash_error(e: Error) // line 74 + pub extern "C" fn __edge_alloc(...) // line 111 + + These symbols exist because `#[plugin_fn]` expansion calls them; they + are not part of the user-facing API. But #[doc(hidden)] only hides + them from rustdoc — it does not exclude them from a glob import. So + every plugin author silently gets `__stash_error` and `__edge_alloc` + in their namespace, and any future rename of those internals is a + breaking change to plugins that don't use them. + +Fix: + Added a curated prelude module: + + pub mod prelude { + pub use crate::{plugin_fn, Handle, Value, Error, Result, + FromValue, IntoValue}; + } + + This is the standard Rust prelude pattern (cf. std::prelude). Plugin + authors can switch to `use edge_pdk::prelude::*;` to opt out of the + glob's noise. The legacy `use edge_pdk::*;` path is unchanged and + still works for existing plugins; the prelude is purely additive. + + A /* ... */ doc comment in the lexer/parser style explains the + intent so contributors know where to extend the surface. + + This addresses just the prelude half of D56. The companion fix — + moving __stash_error / __edge_alloc into a `__internals` submodule + so they cannot leak through any glob — is a breaking change for the + proc-macro and is left for a separate branch. + +Documentation: + reference/wasm-abi.md examples use `use edge_pdk::*;` which continues + to work unchanged. Recommending the prelude in those examples is a + pure migration suggestion, not a correctness fix; skipped to keep + the change strictly additive. + +Build & tests: cargo build -p edge-pdk -> ok; cargo test -> 6/6 passed. diff --git a/edge-pdk/src/lib.rs b/edge-pdk/src/lib.rs index b420695..9c8a556 100644 --- a/edge-pdk/src/lib.rs +++ b/edge-pdk/src/lib.rs @@ -38,6 +38,16 @@ extern crate alloc; pub use edge_pdk_macros::plugin_fn; +/* Curated public surface for plugin authors. Glob-importing the whole + crate exposes #[doc(hidden)] symbols (`__edge_alloc`, `__internals`) + which are part of the macro contract, not the user API. The prelude + re-exports just what `#[plugin_fn]` expansion needs and what most + plugins reach for: type wrappers, the attribute, the trait pair. + Recommended: `use edge_pdk::prelude::*;`. */ +pub mod prelude { + pub use crate::{plugin_fn, Handle, Value, Error, Result, FromValue, IntoValue}; +} + use alloc::{string::String, vec::Vec}; /* ---------- Wire imports --------------------------------------------- */ From 454c9aae395615962c5bfae31f24509cab7447e5 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 19:30:10 -0600 Subject: [PATCH 09/31] add module! bootstrap macro to edge-pdk --- add-pdk-module-bootstrap-macro.txt | 58 ++++++++++++++++++++++++++++++ edge-pdk/Cargo.toml | 6 ++++ edge-pdk/src/lib.rs | 38 ++++++++++++++++++++ 3 files changed, 102 insertions(+) create mode 100644 add-pdk-module-bootstrap-macro.txt diff --git a/add-pdk-module-bootstrap-macro.txt b/add-pdk-module-bootstrap-macro.txt new file mode 100644 index 0000000..0d58039 --- /dev/null +++ b/add-pdk-module-bootstrap-macro.txt @@ -0,0 +1,58 @@ +Branch: add-pdk-module-bootstrap-macro +Tech debt: D55 — every plugin author re-declares the same allocator + panic_handler boilerplate. + +================================================================================ +Commit 1: add edge_pdk::module! bootstrap macro +================================================================================ +Files: + - edge-pdk/Cargo.toml (wasm32 lol_alloc dep) + - edge-pdk/src/lib.rs (re-export + macro_rules! module) + +What was wrong: + Each plugin's lib.rs duplicated four lines of boilerplate: + + extern crate alloc; + #[global_allocator] + static A: lol_alloc::LeakingPageAllocator = lol_alloc::LeakingPageAllocator; + #[panic_handler] + fn panic(_: &core::panic::PanicInfo) -> ! { core::arch::wasm32::unreachable() } + + Two real costs: + - lol_alloc had to be listed in every plugin's Cargo.toml. The + workspace already needed it for the host compiler; making each + plugin add it again is duplication that drifts (different version + pins between plugins is a real failure mode). + - The #[panic_handler] is wasm32-only. New plugin authors who try + to add unit tests find their host build fails because panic_handler + is `duplicate language item` or `cannot apply to host build`. + The cfg-gating that fixes this isn't obvious; the macro hides it. + +Fix: + - Added a wasm32-only `lol_alloc = "0.4"` dependency in + edge-pdk/Cargo.toml under `[target.'cfg(target_arch = "wasm32")'.dependencies]`, + so the host build (e.g. cargo test on the PDK itself) does not pull + it in. + - Re-exported the crate as `__lol_alloc` (under a hidden path) from + edge-pdk/src/lib.rs so the macro can name the symbol regardless of + whether the user added their own `lol_alloc` dep. + - Added a `macro_rules! module` (#[macro_export]) that emits the + #[global_allocator] static and the #[panic_handler] fn under + `#[cfg(target_arch = "wasm32")]`, so on host builds the macro + expands to nothing and `cargo test` works without further changes. + + Plugin authors can now write `edge_pdk::module!();` once at the top + of lib.rs and drop the four boilerplate lines. The pre-existing + manual pattern still works; the macro is strictly additive. + + /* ... */ doc comment in the lexer/parser style records the exact + attributes the macro emits and the still-required crate-root + attributes (#![no_std], #![no_main], extern crate alloc;) that + cannot be injected from inside an item position. + +Documentation: + reference/wasm-abi.md and reference/writing-modules.md show the + manual pattern. Both are still valid — the macro is opt-in. No doc + updates are strictly required (recommending the macro path is a + pure migration suggestion, not a correctness fix). + +Build & tests: cargo build -p edge-pdk -> ok; cargo test -> 6/6 passed. diff --git a/edge-pdk/Cargo.toml b/edge-pdk/Cargo.toml index 274c400..ecf8455 100644 --- a/edge-pdk/Cargo.toml +++ b/edge-pdk/Cargo.toml @@ -12,3 +12,9 @@ doctest = false [dependencies] edge-pdk-macros = { path = "macros", version = "0.1.0" } + +# Re-exported only so `edge_pdk::module!()` can refer to the symbol path. +# Plugin authors who skip the macro and declare their own #[global_allocator] +# do not pay for this — Rust drops unused crates from the cdylib. +[target.'cfg(target_arch = "wasm32")'.dependencies] +lol_alloc = "0.4" diff --git a/edge-pdk/src/lib.rs b/edge-pdk/src/lib.rs index 9c8a556..a0f5b22 100644 --- a/edge-pdk/src/lib.rs +++ b/edge-pdk/src/lib.rs @@ -48,6 +48,44 @@ pub mod prelude { pub use crate::{plugin_fn, Handle, Value, Error, Result, FromValue, IntoValue}; } +/* ---------- Plugin bootstrap ----------------------------------------- */ + +/* Re-exported under a hidden path so `module!` can name lol_alloc without + forcing the plugin author to add it to their own Cargo.toml. */ +#[cfg(target_arch = "wasm32")] +#[doc(hidden)] +pub use lol_alloc as __lol_alloc; + +/* Emits the wasm32-only boilerplate every Edge Python plugin needs: + - a #[global_allocator] backed by lol_alloc::LeakingPageAllocator + (single-threaded bump allocator that matches the host model), + - a #[panic_handler] that traps via wasm32::unreachable. + + The plugin author still writes #![no_std] / #![no_main] / extern crate + alloc; at the crate root — those are crate-level attributes the macro + cannot inject from inside an item position. + + Usage: + edge_pdk::module!(); + + On non-wasm targets (e.g. host-side unit tests for the plugin) the + macro expands to nothing so cargo test still works. */ +#[macro_export] +macro_rules! module { + () => { + #[cfg(target_arch = "wasm32")] + #[global_allocator] + static __EDGE_PDK_ALLOC: $crate::__lol_alloc::LeakingPageAllocator + = $crate::__lol_alloc::LeakingPageAllocator; + + #[cfg(target_arch = "wasm32")] + #[panic_handler] + fn __edge_pdk_panic(_: &core::panic::PanicInfo) -> ! { + core::arch::wasm32::unreachable() + } + }; +} + use alloc::{string::String, vec::Vec}; /* ---------- Wire imports --------------------------------------------- */ From 9ec804e08946f5b7a75f1687b9fbae5c328f2ab3 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 20:39:18 -0600 Subject: [PATCH 10/31] export edge_abi_version from pdk --- documentation/reference/wasm-abi.md | 13 ++++-- edge-pdk/src/lib.rs | 12 ++++++ export-pdk-abi-version.txt | 63 +++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 export-pdk-abi-version.txt diff --git a/documentation/reference/wasm-abi.md b/documentation/reference/wasm-abi.md index 7020118..d68925a 100644 --- a/documentation/reference/wasm-abi.md +++ b/documentation/reference/wasm-abi.md @@ -24,16 +24,23 @@ extern "C" fn (argv: *const u32, argc: u32, out: *mut u32) -> i32; Handles in `argv` are owned by the host and live for the duration of the call. Handles the guest creates via `edge_encode` or `edge_op` are owned by the guest until released — the guest must call `edge_release` on each before returning, **except** for the one written into `*out`. -## Required guest export +## Required guest exports In addition to the user functions, every guest module MUST export: ```rust #[unsafe(no_mangle)] pub extern "C" fn __edge_alloc(size: u32) -> *mut u8; + +#[unsafe(no_mangle)] +pub extern "C" fn __edge_abi_version() -> u32; ``` -The host calls this to stage `argv` arrays in the guest's linear memory before invoking each export. The reference `edge-pdk` crate emits it automatically. +`__edge_alloc` lets the host stage `argv` arrays in the guest's linear memory before invoking each export. + +`__edge_abi_version` returns the wire-format version this module targets (currently `1`). The host MUST read this symbol once at instantiation and refuse modules whose version it does not understand. Without the handshake, a host that has evolved beyond v1 would load a v1 module and decode garbage silently. + +The reference `edge-pdk` crate emits both symbols automatically (`EDGE_ABI_VERSION` is a `pub const` in the same crate). ## Host imports (6 functions) @@ -346,7 +353,7 @@ The Edge Python project distributes only this specification. The reference Rust - `#[plugin_fn]` proc macro that turns a typed Rust function into a wire-conformant export. - `FromValue` / `IntoValue` traits with primitive impls (`i64`, `f64`, `bool`, `String`, `&str`, `Option`, `Handle`). - `Handle` / `Value` / `Error` types wrapping handles with `Drop`-driven release. -- The required `__edge_alloc` export emitted automatically. +- The required `__edge_alloc` and `__edge_abi_version` exports emitted automatically. A typical author-side function with the macro: diff --git a/edge-pdk/src/lib.rs b/edge-pdk/src/lib.rs index a0f5b22..7cde6bf 100644 --- a/edge-pdk/src/lib.rs +++ b/edge-pdk/src/lib.rs @@ -117,6 +117,18 @@ unsafe extern "C" { pub fn edge_throw(kind: u32, msg_ptr: *const u8, msg_len: u32); } +/* ---------- ABI version handshake ------------------------------------ */ + +/* Wire-format version this PDK targets. Bump on any breaking change to + op codes, value tags, codec layout, or error kinds. The host loader + reads `__edge_abi_version` and refuses to instantiate a plugin whose + version it does not understand — without this, an evolved host would + load an old plugin and decode garbage silently. */ +pub const EDGE_ABI_VERSION: u32 = 1; + +#[unsafe(no_mangle)] +pub extern "C" fn __edge_abi_version() -> u32 { EDGE_ABI_VERSION } + /* ---------- Op codes & tags (must match bridge.rs spec) -------------- */ #[allow(non_camel_case_types)] diff --git a/export-pdk-abi-version.txt b/export-pdk-abi-version.txt new file mode 100644 index 0000000..f7f14a3 --- /dev/null +++ b/export-pdk-abi-version.txt @@ -0,0 +1,63 @@ +Branch: export-pdk-abi-version +Tech debt: D22 — PDK had no version handshake; an evolved host could load a v1 plugin and decode garbage silently. + +================================================================================ +Commit 1: export __edge_abi_version from edge-pdk +================================================================================ +Files: + - edge-pdk/src/lib.rs + - documentation/reference/wasm-abi.md + +What was wrong: + The PDK's module-doc said it targeted "v1 wasm-abi" (line 4 of + src/lib.rs), but no symbol carried that version into the linked + artifact. Concretely: + + - The `op` and `tag` modules listed const codes a plugin had to match + against host expectations. + - The host-side bridge.rs defined the same codes (D21 — already + flagged as needing centralisation). + - Nothing told a host loader which version of the wire format the + .wasm in front of it was built against. + + Result: if the host evolved (e.g. added op = 8 with a new layout), + it would load an old plugin that still implemented op = 0..7. Most + call paths would seem to work; the new op would silently produce a + different effect (or crash with no actionable diagnostic) the first + time it landed. + +Fix: + Added `pub const EDGE_ABI_VERSION: u32 = 1;` to edge-pdk/src/lib.rs + and a no_mangle wasm export: + + #[unsafe(no_mangle)] + pub extern "C" fn __edge_abi_version() -> u32 { EDGE_ABI_VERSION } + + Every plugin built against this PDK now carries a version stamp the + host can read at instantiation. This is purely additive on the + guest side; existing hosts that don't read the symbol behave + unchanged. + + /* ... */ doc comment in the lexer/parser style records why bumps + are required (any breaking change to op/tag/codec/error kinds). + +Documentation: + reference/wasm-abi.md required two changes (strict necessity — the + doc declares the public contract): + + 1. The "Required guest export" section (singular) became "Required + guest exports" (plural) with both `__edge_alloc` and + `__edge_abi_version` listed in the rust signature block, plus a + paragraph explaining the host's MUST-check obligation. + 2. The PDK summary near the bottom now says "the required + __edge_alloc and __edge_abi_version exports emitted automatically". + + No JS-side host loader code is in this repo, so the loader-side + validation is documented as a contract for downstream hosts to + honour. The companion edge-abi crate (D21) and host-side reading + of this symbol from compiler.wasm remain separate work. + +Build & tests: + cargo build -p edge-pdk -> ok. + cargo build -p slugify-mod --target wasm32-unknown-unknown --release -> ok. + cargo test -> 6/6 passed. From 160d56b1943da09eb67c686672c89758f535099b Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:43:18 -0600 Subject: [PATCH 11/31] add wasm_free symmetric to wasm_alloc --- add-wasm-free-export.txt | 40 ++++++++++++++++++++++++++++++++++++ compiler/src/main/exports.rs | 12 +++++++++++ 2 files changed, 52 insertions(+) create mode 100644 add-wasm-free-export.txt diff --git a/add-wasm-free-export.txt b/add-wasm-free-export.txt new file mode 100644 index 0000000..353cd9e --- /dev/null +++ b/add-wasm-free-export.txt @@ -0,0 +1,40 @@ +Branch: add-wasm-free-export +Tech debt: D17 (partial) — wasm_alloc has no symmetric wasm_free, every host alloc leaks. + +================================================================================ +Commit 1: add wasm_free counterpart for wasm_alloc +================================================================================ +File: compiler/src/main/exports.rs + +What was wrong: + wasm_alloc returned a pointer obtained from Box::into_raw(...) of a + boxed slice, but no export let the host hand the pointer back to be + reclaimed. Every host-side staging buffer therefore leaked for the + lifetime of the WASM instance. The WASM runtime had grown to expect + this — the comment in mod.rs about the bump allocator masks the + fact that the underlying allocator itself can free, the host just + had no surface to ask for it. + +Fix: + Added the symmetric export: + + pub unsafe extern "C" fn wasm_free(ptr: *mut u8, size: u32); + + It reconstructs the boxed slice from (ptr, size) and drops it — the + exact inverse of wasm_alloc's `Box::into_raw(boxed_slice)`. Treats + null ptr and size == 0 as no-ops so the host can call it + unconditionally after every alloc. A /* ... */ comment in the + lexer/parser style records the precondition (size must match the + original alloc). + + This does NOT address the broader D2 (deprecate SRC/OUT/INP in + favour of handle-based ABI) — that's structural. It just closes + the alloc/free asymmetry so a host that wants to release buffers + can, today, without changing the rest of the wire. + +Documentation: + No .md docs reference wasm_alloc or wasm_free at the wire level (the + WASM ABI doc covers host imports / exports for handles, not these + staging buffers). No doc updates required. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/compiler/src/main/exports.rs b/compiler/src/main/exports.rs index f84a236..8a3b4c3 100644 --- a/compiler/src/main/exports.rs +++ b/compiler/src/main/exports.rs @@ -23,6 +23,18 @@ pub unsafe extern "C" fn wasm_alloc(size: u32) -> *mut u8 { Box::into_raw(v.into_boxed_slice()) as *mut u8 } +/* Releases a buffer previously returned by `wasm_alloc`. The host MUST pass + the exact same `size` it requested; mismatched lengths reconstruct the + wrong Box layout. Calling with a null pointer or `size == 0` is a no-op. */ +#[unsafe(no_mangle)] +pub unsafe extern "C" fn wasm_free(ptr: *mut u8, size: u32) { + if ptr.is_null() || size == 0 { return; } + unsafe { + let slice = core::slice::from_raw_parts_mut(ptr, size as usize); + let _ = Box::from_raw(slice as *mut [u8]); + } +} + #[unsafe(no_mangle)] pub unsafe extern "C" fn register_code_module( spec_ptr: *const u8, spec_len: u32, From dcc7a9ce279f25b82434c8f5aa888ab411bf43f2 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 14:19:16 -0600 Subject: [PATCH 12/31] drop unused package re-export aliases --- compiler/src/modules/packages/mod.rs | 5 ----- remove-cosmetic-package-aliases.txt | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 remove-cosmetic-package-aliases.txt diff --git a/compiler/src/modules/packages/mod.rs b/compiler/src/modules/packages/mod.rs index a66ef45..5b8f8de 100644 --- a/compiler/src/modules/packages/mod.rs +++ b/compiler/src/modules/packages/mod.rs @@ -88,11 +88,6 @@ impl Default for Box { fn default() -> Self { Box::new(NoopResolver) } } -/* Re-exports core types; hosts get trait, enums, binding, and default resolver via glob import. */ -pub use NativeBinding as Binding; -pub use Resolved as ResolvedModule; -pub use NoopResolver as Default_; - /* Converts public NativeBinding into internal ExternFn; two structs separate host API from VM storage. */ pub(crate) fn binding_to_extern(b: &NativeBinding) -> crate::modules::vm::types::ExternFn { crate::modules::vm::types::ExternFn { diff --git a/remove-cosmetic-package-aliases.txt b/remove-cosmetic-package-aliases.txt new file mode 100644 index 0000000..e4fa179 --- /dev/null +++ b/remove-cosmetic-package-aliases.txt @@ -0,0 +1,28 @@ +Branch: remove-cosmetic-package-aliases +Tech debt: D45 — three pub-use aliases unused anywhere in the workspace. + +================================================================================ +Commit 1: drop unused package re-export aliases +================================================================================ +File: compiler/src/modules/packages/mod.rs + +What was wrong: + Lines 91-94 re-exported three names with no callers: + pub use NativeBinding as Binding; + pub use Resolved as ResolvedModule; + pub use NoopResolver as Default_; + + A workspace-wide grep for `\bBinding\b|\bResolvedModule\b|\bDefault_\b` + returns only the three declarations themselves. The .md docs and rust + sources all use the canonical names `NativeBinding`, `Resolved`, and + `NoopResolver`. + + `Default_` with the trailing underscore is also a Rust-convention + smell — readers expect `Default` to be the trait, not a type alias. + +Fix: + Removed the three aliases and their wrapping comment. Canonical names + remain the single source of truth. + +Tests: cargo test -> 6/6 passed. +Docs: none reference the aliases; no changes required. From 3e0eea6c67a464485aeae4dc39b09221b16163ea Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 14:15:47 -0600 Subject: [PATCH 13/31] fix plugin_fn macro error message --- edge-pdk/macros/src/lib.rs | 2 +- fix-plugin-fn-macro-error-message.txt | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 fix-plugin-fn-macro-error-message.txt diff --git a/edge-pdk/macros/src/lib.rs b/edge-pdk/macros/src/lib.rs index 240e1fa..cf201c9 100644 --- a/edge-pdk/macros/src/lib.rs +++ b/edge-pdk/macros/src/lib.rs @@ -51,7 +51,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { } FnArg::Receiver(_) => { return TokenStream::from(quote! { - compile_error!("#[edge] does not support methods (`self` parameter)"); + compile_error!("#[plugin_fn] does not support methods (`self` parameter)"); }); } } diff --git a/fix-plugin-fn-macro-error-message.txt b/fix-plugin-fn-macro-error-message.txt new file mode 100644 index 0000000..f3253eb --- /dev/null +++ b/fix-plugin-fn-macro-error-message.txt @@ -0,0 +1,27 @@ +Branch: fix-plugin-fn-macro-error-message +Tech debt: D6 — compile_error message cites a non-existent attribute name. + +================================================================================ +Commit 1: fix plugin_fn macro error message attribute name +================================================================================ +File: edge-pdk/macros/src/lib.rs + +What was wrong: + Line 54: compile_error!("#[edge] does not support methods (`self` parameter)"); + + The exported attribute is `#[plugin_fn]`, not `#[edge]`. The literal looks + like a copy-paste from a previous renaming pass. A plugin author who tried + to write a method-style fn would see an error pointing at `#[edge]` — a + symbol that does not exist in this crate or in the docs, so they cannot + grep their code for it. + +Fix: + Changed the literal to "#[plugin_fn] does not support methods (`self` + parameter)". + + Cargo.toml description in the same crate also misnames the attribute, but + that is part of the larger crate rename (D5: `macros` -> `edge-pdk-macros`) + and is left for that branch. All .md documentation already uses + `#[plugin_fn]` consistently; no doc changes were needed. + +Build: cargo build -p macros -> ok. From 2a4691b54bb1d050288304450683b2624021a145 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 14:22:43 -0600 Subject: [PATCH 14/31] correct host_edge_encode doc comment --- compiler/src/main/abi_bridge.rs | 2 +- fix-host-edge-encode-doc.txt | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 fix-host-edge-encode-doc.txt diff --git a/compiler/src/main/abi_bridge.rs b/compiler/src/main/abi_bridge.rs index ec6e01d..3c34499 100644 --- a/compiler/src/main/abi_bridge.rs +++ b/compiler/src/main/abi_bridge.rs @@ -212,7 +212,7 @@ fn dispatch_iter_next(recv_h: u32) -> Result { }) } -// Bootstrap decoder: writes tag to `*out_tag`, bytes to `dst[..dst_max]`. +// Bootstrap encoder: classifies (tag, bytes) into a Val handle; returns 0 on Invalid. #[unsafe(no_mangle)] pub unsafe extern "C" fn host_edge_encode(tag: u32, ptr: *const u8, len: u32) -> u32 { let bytes = if len == 0 || ptr.is_null() { diff --git a/fix-host-edge-encode-doc.txt b/fix-host-edge-encode-doc.txt new file mode 100644 index 0000000..4c28cef --- /dev/null +++ b/fix-host-edge-encode-doc.txt @@ -0,0 +1,32 @@ +Branch: fix-host-edge-encode-doc +Tech debt: D59 — duplicated copy-paste doc-comment on host_edge_encode. + +================================================================================ +Commit 1: correct host_edge_encode doc-comment +================================================================================ +File: compiler/src/main/abi_bridge.rs + +What was wrong: + Lines 214 and 236 both carried the same doc-comment: + "// Bootstrap decoder: writes tag to `*out_tag`, bytes to `dst[..dst_max]`." + + The comment is correct above host_edge_decode (line 236) but a copy-paste + on host_edge_encode (line 214). encode does not write to *out_tag and + does not own a dst buffer — it takes (tag, ptr, len), classifies them + into a Val, and returns the resulting handle (0 on Invalid). The + implementation made this clear; only the comment misled. + +Fix: + Replaced the comment on encode with: + "// Bootstrap encoder: classifies (tag, bytes) into a Val handle; + returns 0 on Invalid." + + This matches the actual signature + fn host_edge_encode(tag: u32, ptr: *const u8, len: u32) -> u32 + and the body's behaviour (Direct/AllocStr -> put_val, Invalid -> 0). + +Documentation: + reference/wasm-abi.md mentions host_edge_encode by name only (line 328); + no description there to drift. No doc changes required. + +Build: cargo build -p edge-python -> ok. From 38e1dddc1f57bf9bd4c87e6cd7d604a967051c6a Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 14:14:05 -0600 Subject: [PATCH 15/31] fix raw string prefix detection --- compiler/src/modules/parser/types.rs | 11 ++++++++-- fix-raw-string-prefix-detection.txt | 32 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 fix-raw-string-prefix-detection.txt diff --git a/compiler/src/modules/parser/types.rs b/compiler/src/modules/parser/types.rs index 977538c..41b08ee 100644 --- a/compiler/src/modules/parser/types.rs +++ b/compiler/src/modules/parser/types.rs @@ -355,9 +355,16 @@ impl Diagnostic { } } +/* Scan only the prefix chars before the opening quote; the body itself may legally contain 'r'/'R'. */ +fn has_raw_prefix(s: &str) -> bool { + s.bytes() + .take_while(|b| !matches!(b, b'"' | b'\'')) + .any(|b| matches!(b, b'r' | b'R')) +} + // Strip prefix + quotes and unescape (skipped for raw strings). pub(super) fn parse_string(s: &str) -> String { - let is_raw = s.contains('r') || s.contains('R'); + let is_raw = has_raw_prefix(s); let s = s.trim_start_matches(|c: char| "bBrRuU".contains(c)); let inner = if s.starts_with("\"\"\"") || s.starts_with("'''") { &s[3..s.len() - 3] @@ -370,7 +377,7 @@ pub(super) fn parse_string(s: &str) -> String { /* Parses b"..." to raw bytes: non-ASCII pass through; \xHH=single byte; \u/\U/\N rejected. */ pub(super) fn parse_bytes_literal(s: &str) -> alloc::vec::Vec { let bytes = s.as_bytes(); - let is_raw = s.contains('r') || s.contains('R'); + let is_raw = has_raw_prefix(s); // Skip b/B/r/R prefix chars. let mut i = 0; while i < bytes.len() && matches!(bytes[i], b'b' | b'B' | b'r' | b'R') { diff --git a/fix-raw-string-prefix-detection.txt b/fix-raw-string-prefix-detection.txt new file mode 100644 index 0000000..531b8e5 --- /dev/null +++ b/fix-raw-string-prefix-detection.txt @@ -0,0 +1,32 @@ +Branch: fix-raw-string-prefix-detection +Tech debt: D1 — is_raw scans entire lexeme instead of prefix only. + +================================================================================ +Commit 1: fix raw string prefix detection +================================================================================ +File: compiler/src/modules/parser/types.rs + +What was wrong: + parse_string (line 360): `let is_raw = s.contains('r') || s.contains('R');` + parse_bytes_literal (line 373): same pattern. + + s is the full lexeme (prefix + quotes + body). `contains('r')` therefore + scans the body too. Any non-raw u-string or b-string whose body contains + the letter 'r' or 'R' was silently treated as raw, so escapes like \n, \t, + \xHH were preserved verbatim. + + Concrete example: u"hello\nworld" -> body contains no 'r', escape works. + But u"forward\n" -> body contains 'r' inside "forward", is_raw=true, + the literal kept the backslash-n unescaped. + + Documentation in implementation/lexical.md already states that the prefix + is recognised before the opening quote, so docs were already correct; only + the implementation diverged. + +Fix: + Introduced has_raw_prefix(s) which reads bytes only up to the first quote + byte (`"` or `'`) and checks for r/R there. parse_string and + parse_bytes_literal now call has_raw_prefix, so detection matches the + language spec and the documentation. + +Tests: cargo test -> 6/6 passed. From 198b227bb2f019bcbe22f5f81c9ade28249efb75 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 14:17:16 -0600 Subject: [PATCH 16/31] add bell backspace form-feed vtab escapes --- add-bell-backspace-form-vtab-escapes.txt | 30 ++++++++++++++++++++++++ compiler/src/modules/parser/types.rs | 8 +++++++ documentation/implementation/lexical.md | 2 +- 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 add-bell-backspace-form-vtab-escapes.txt diff --git a/add-bell-backspace-form-vtab-escapes.txt b/add-bell-backspace-form-vtab-escapes.txt new file mode 100644 index 0000000..b00e84d --- /dev/null +++ b/add-bell-backspace-form-vtab-escapes.txt @@ -0,0 +1,30 @@ +Branch: add-bell-backspace-form-vtab-escapes +Tech debt: D70 — missing \a \b \f \v escapes in unescape and parse_bytes_literal. + +================================================================================ +Commit 1: add bell backspace form-feed vertical-tab escapes +================================================================================ +Files: + - compiler/src/modules/parser/types.rs + - documentation/implementation/lexical.md + +What was wrong: + unescape (parser/types.rs:422+) had cases for \n \t \r \\ \' \" \x \u \U + and 1-3 digit octal, but no cases for \a \b \f \v. These are valid Python + escape sequences for ASCII control characters BEL, BS, FF, VT. Without a + matching arm they fell through to the "Some(c) => out.push('\\'); out.push(c);" + branch, so "\\a" stayed as the two characters '\' and 'a'. + + parse_bytes_literal had the same gap. + +Fix: + Added \a -> 0x07, \b -> 0x08, \f -> 0x0C, \v -> 0x0B in both unescape (str) + and parse_bytes_literal (bytes), keeping the existing match-arm style. + +Documentation: + documentation/implementation/lexical.md line 78 explicitly enumerates the + recognised escapes; updating it was strictly necessary since omitting the + new escapes would leave the spec incomplete. Added \a \b \f \v to the + recognised-escapes list in the same compact inline format. + +Tests: cargo test -> 6/6 passed. diff --git a/compiler/src/modules/parser/types.rs b/compiler/src/modules/parser/types.rs index 41b08ee..6d56d4c 100644 --- a/compiler/src/modules/parser/types.rs +++ b/compiler/src/modules/parser/types.rs @@ -402,6 +402,10 @@ pub(super) fn parse_bytes_literal(s: &str) -> alloc::vec::Vec { b'n' => { out.push(b'\n'); j += 2; } b't' => { out.push(b'\t'); j += 2; } b'r' => { out.push(b'\r'); j += 2; } + b'a' => { out.push(0x07); j += 2; } + b'b' => { out.push(0x08); j += 2; } + b'f' => { out.push(0x0C); j += 2; } + b'v' => { out.push(0x0B); j += 2; } b'\\' => { out.push(b'\\'); j += 2; } b'\'' => { out.push(b'\''); j += 2; } b'"' => { out.push(b'"'); j += 2; } @@ -441,6 +445,10 @@ fn unescape(s: &str) -> String { Some('n') => out.push('\n'), Some('t') => out.push('\t'), Some('r') => out.push('\r'), + Some('a') => out.push('\u{07}'), + Some('b') => out.push('\u{08}'), + Some('f') => out.push('\u{0C}'), + Some('v') => out.push('\u{0B}'), Some('\\') => out.push('\\'), Some('\'') => out.push('\''), Some('"') => out.push('"'), diff --git a/documentation/implementation/lexical.md b/documentation/implementation/lexical.md index ea2bf51..00339cd 100644 --- a/documentation/implementation/lexical.md +++ b/documentation/implementation/lexical.md @@ -75,7 +75,7 @@ fr'raw fstring' # raw f-string """triple""" # triple-quoted, single or double ``` -A leading prefix is recognised before the opening quote by the identifier scanner and verified against `is_string_prefix`, `is_fstring_prefix`, or `is_bytes_prefix`. Triple-quoted strings span newlines and bump `line` for each `\n` inside. Backslash escapes are consumed at lex time but **decoded** by the parser, so escape semantics live alongside the literal type. Recognised escapes: `\n \t \r \\ \' \" \xHH \uHHHH \UHHHHHHHH` plus 1- to 3-digit octal escapes (`\012` -> `\n`, `\101` -> `A`). `\N{NAME}` Unicode-name escapes are not implemented and pass through as literal text — embedding the ~200 KB Unicode-name database is rejected as too costly for the WASM artifact. +A leading prefix is recognised before the opening quote by the identifier scanner and verified against `is_string_prefix`, `is_fstring_prefix`, or `is_bytes_prefix`. Triple-quoted strings span newlines and bump `line` for each `\n` inside. Backslash escapes are consumed at lex time but **decoded** by the parser, so escape semantics live alongside the literal type. Recognised escapes: `\n \t \r \a \b \f \v \\ \' \" \xHH \uHHHH \UHHHHHHHH` plus 1- to 3-digit octal escapes (`\012` -> `\n`, `\101` -> `A`). `\N{NAME}` Unicode-name escapes are not implemented and pass through as literal text — embedding the ~200 KB Unicode-name database is rejected as too costly for the WASM artifact. Lex-time errors anchor on the opening quote so the user's `^` marker points at the offender, not at end-of-line: From 50cabfd4cd297ba886755e1c7c094f4dc8573fd0 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 15:27:47 -0600 Subject: [PATCH 17/31] generalize integer literal overflow message --- compiler/src/modules/parser/expr.rs | 2 +- generalize-int-overflow-message.txt | 44 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 generalize-int-overflow-message.txt diff --git a/compiler/src/modules/parser/expr.rs b/compiler/src/modules/parser/expr.rs index 888ce53..5095d2b 100644 --- a/compiler/src/modules/parser/expr.rs +++ b/compiler/src/modules/parser/expr.rs @@ -280,7 +280,7 @@ impl<'src, I: Iterator> Parser<'src, I> { }; match parsed { Some(v) => self.emit_const(Value::Int(v)), - None => self.error("integer literal too large for 47-bit Val (max \u{00b1}140737488355327)"), + None => self.error("integer literal too large to represent"), } } diff --git a/generalize-int-overflow-message.txt b/generalize-int-overflow-message.txt new file mode 100644 index 0000000..f6cf258 --- /dev/null +++ b/generalize-int-overflow-message.txt @@ -0,0 +1,44 @@ +Branch: generalize-int-overflow-message +Tech debt: D63 — parse-time int overflow message exposes Val internals. + +================================================================================ +Commit 1: generalize integer literal overflow message +================================================================================ +File: compiler/src/modules/parser/expr.rs + +What was wrong: + parse_number (line 283) emitted: + + "integer literal too large for 47-bit Val (max \u{00b1}140737488355327)" + + Two issues: + + 1. The error fires on `digits.parse::().ok()` failure. i64::MAX + is ~9.2e18, far above the 47-bit cap (~1.4e14). So a literal in + the 1.4e14..9.2e18 window parses fine here and only fails LATER + at NaN-box materialisation in cache.rs / optimizer.rs as + OverflowError. The "47-bit" attribution at this site is therefore + factually wrong — the limit being violated at parse time is the + i64 ceiling, not the 47-bit Val ceiling. + 2. Even when the upper bound were correct, "47-bit Val" leaks the + internal NaN-boxing detail to a parse-time user-facing error. + +Fix: + Replaced the message with the simpler, accurate phrasing: + + "integer literal too large to represent" + + The 47-bit fact remains documented in: + - documentation/implementation/design.md (architecture) + - compiler/README.md (memory model + Val table) + - documentation/getting-started/what-it-is.md (numbers) + These are correct: the 47-bit cap is enforced at materialisation as + OverflowError, exactly as the docs say. Only the parse-time message + was misattributing the failure. + +Documentation: + No doc changes required. The 47-bit invariant docs describe the VM + side, which is unchanged. The parse-time message is not surfaced in + any doc. + +Tests: cargo test -> 6/6 passed. From dc19cb8453e3a8f429753beddd7838596a8ecc5d Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:54:50 -0600 Subject: [PATCH 18/31] split fast-path outcomes for ic stability --- compiler/src/modules/vm/dispatch.rs | 44 ++++++++++++------ distinguish-overflow-from-type-miss.txt | 62 +++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 15 deletions(-) create mode 100644 distinguish-overflow-from-type-miss.txt diff --git a/compiler/src/modules/vm/dispatch.rs b/compiler/src/modules/vm/dispatch.rs index 4233317..3f0a81f 100644 --- a/compiler/src/modules/vm/dispatch.rs +++ b/compiler/src/modules/vm/dispatch.rs @@ -7,15 +7,24 @@ use super::{ExceptionFrame, VM, handlers}; use super::types::*; use super::cache::{OpcodeCache, FastOp}; +/* Three-way result of a fast-path attempt; see exec_fast for semantics. */ +enum FastOutcome { Done, TypeMiss, Overflow } + impl<'a> VM<'a> { - /* Inline-cache fast path. Peeks the stack and only pops on success; - returns Ok(false) with the stack untouched on a type-guard miss - so the caller can fall back to the generic handler and deopt the IC. */ + /* Inline-cache fast path. Peeks the stack and only pops on success. + Three outcomes: + Done — the op ran inline; stack consumed and result pushed. + TypeMiss — operands didn't match the speculation; deopt the IC. + Overflow — types matched but the result can't be represented (int + overflow, division by zero); the slow handler will + raise the proper Python exception. The IC stays warm + because the speculation was correct: the op IS hot + on these types, only this *one* input pair fell out. */ #[inline] - fn exec_fast(&mut self, fast: FastOp) -> Result { + fn exec_fast(&mut self, fast: FastOp) -> Result { let len = self.stack.len(); - if len < 2 { return Ok(false); } + if len < 2 { return Ok(FastOutcome::TypeMiss); } let a = self.stack[len - 2]; let b = self.stack[len - 1]; @@ -25,28 +34,28 @@ impl<'a> VM<'a> { FastOp::AddInt if a.is_int() && b.is_int() => { match a.as_int().checked_add(b.as_int()).and_then(Val::int_checked) { Some(v) => v, - None => return Ok(false), + None => return Ok(FastOutcome::Overflow), } } FastOp::SubInt if a.is_int() && b.is_int() => { match a.as_int().checked_sub(b.as_int()).and_then(Val::int_checked) { Some(v) => v, - None => return Ok(false), + None => return Ok(FastOutcome::Overflow), } } FastOp::MulInt if a.is_int() && b.is_int() => { let r = a.as_int() as i128 * b.as_int() as i128; - if r >= Val::INT_MIN as i128 && r <= Val::INT_MAX as i128 { Val::int(r as i64) } else { return Ok(false); } + if r >= Val::INT_MIN as i128 && r <= Val::INT_MAX as i128 { Val::int(r as i64) } else { return Ok(FastOutcome::Overflow); } } FastOp::MulFloat if a.is_float() && b.is_float() => Val::float(a.as_float() * b.as_float()), FastOp::ModInt if a.is_int() && b.is_int() => { let bv = b.as_int(); - if bv == 0 { return Ok(false); } + if bv == 0 { return Ok(FastOutcome::Overflow); } Val::int(((a.as_int() % bv) + bv) % bv) } FastOp::FloorDivInt if a.is_int() && b.is_int() => { let bv = b.as_int(); - if bv == 0 { return Ok(false); } + if bv == 0 { return Ok(FastOutcome::Overflow); } Val::int(a.as_int().div_euclid(bv)) } @@ -61,7 +70,7 @@ impl<'a> VM<'a> { FastOp::AddStr | FastOp::EqStr if a.is_heap() && b.is_heap() => { let (sa, sb) = match (self.heap.get(a), self.heap.get(b)) { (HeapObj::Str(x), HeapObj::Str(y)) => (x.clone(), y.clone()), - _ => return Ok(false), + _ => return Ok(FastOutcome::TypeMiss), }; match fast { FastOp::AddStr => { @@ -73,12 +82,12 @@ impl<'a> VM<'a> { } } - _ => return Ok(false), + _ => return Ok(FastOutcome::TypeMiss), }; self.stack.truncate(len - 2); self.push(result); - Ok(true) + Ok(FastOutcome::Done) } /* Main dispatch loop. Walks the fused instruction stream (LoadAttr+Call @@ -368,8 +377,13 @@ impl<'a> VM<'a> { | OpCode::Eq | OpCode::Lt | OpCode::NotEq | OpCode::Gt | OpCode::LtEq | OpCode::GtEq => { if let Some(fast) = cache.get_fast(rip) { - if self.exec_fast(fast)? { return Ok(None); } - cache.invalidate(rip); + match self.exec_fast(fast)? { + FastOutcome::Done => return Ok(None), + /* Speculation was right (matching types); the slow handler + will raise the proper Python exception. Keep the IC. */ + FastOutcome::Overflow => {} + FastOutcome::TypeMiss => cache.invalidate(rip), + } } if matches!(ins.opcode, OpCode::Eq | OpCode::Lt | OpCode::NotEq | OpCode::Gt | OpCode::LtEq | OpCode::GtEq) diff --git a/distinguish-overflow-from-type-miss.txt b/distinguish-overflow-from-type-miss.txt new file mode 100644 index 0000000..354a294 --- /dev/null +++ b/distinguish-overflow-from-type-miss.txt @@ -0,0 +1,62 @@ +Branch: distinguish-overflow-from-type-miss +Tech debt: D29 — IC fast path treated arithmetic overflow as a type-guard miss and threw away the speculation. + +================================================================================ +Commit 1: split fast-path outcomes into Done/TypeMiss/Overflow +================================================================================ +File: compiler/src/modules/vm/dispatch.rs + +What was wrong: + exec_fast returned Ok(true) on success and Ok(false) on every other + fall-back path: type guard miss, integer overflow (AddInt/SubInt/ + MulInt), and division by zero (ModInt/FloorDivInt). The dispatcher + treated Ok(false) uniformly: + + if self.exec_fast(fast)? { return Ok(None); } + cache.invalidate(rip); // <-- always + + This means: the very first time a hot AddInt loop sees an overflow, + the IC slot is wiped. The slow handler raises the right + OverflowError, but for the next QUICK_THRESH = 4 iterations of the + loop the dispatch goes through the slow path until the IC re-warms. + In a numeric loop where ~99.99% of iterations are inline-int and one + is overflow, every overflow event costs four extra slow-path + dispatches plus the recompute of the type key. The cache thrashed + for the wrong reason — types had not changed, only one input pair + exceeded the 47-bit Val range. + +Fix: + Introduced a 3-way enum in the dispatch module: + enum FastOutcome { Done, TypeMiss, Overflow } + + exec_fast now returns Result: + Done — handled inline, no further work. + TypeMiss — operands didn't match the speculation (heap-not-Str, + not-int, not-float, fewer than 2 elements on stack). + Overflow — types matched but the result is unrepresentable + (i64 overflow, mod/floordiv with b==0). The slow + handler will raise OverflowError or ZeroDivisionError; + the IC stays warm because the speculation was correct. + + The dispatcher's call site explicitly destructures: + + match self.exec_fast(fast)? { + FastOutcome::Done => return Ok(None), + FastOutcome::Overflow => {} // fall through, keep IC + FastOutcome::TypeMiss => cache.invalidate(rip), + } + + Net effect: a hot int-arith loop that hits an occasional overflow + no longer thrashes the IC; the slow handler raises the same exception + it always did, then the next iteration is back on the fast path. + + A /* ... */ doc comment in the lexer/parser style records the new + semantics on exec_fast and on the dispatcher. + +Documentation: + README.md, design.md, what-it-is.md describe the IC at the level of + "type-guard miss invalidates and deopts". That statement is still + exactly true — only TypeMiss invalidates. The new Overflow outcome + is a refinement the docs don't speak to. No doc updates required. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. From 8f4a607003496daa99f6d611f9554ae633522351 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:47:54 -0600 Subject: [PATCH 19/31] skip memoization for mutable args --- compiler/src/modules/vm/cache.rs | 14 ++++++++ documentation/implementation/design.md | 2 +- gate-templates-on-immutable-args.txt | 44 ++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 gate-templates-on-immutable-args.txt diff --git a/compiler/src/modules/vm/cache.rs b/compiler/src/modules/vm/cache.rs index 0709f84..6898570 100644 --- a/compiler/src/modules/vm/cache.rs +++ b/compiler/src/modules/vm/cache.rs @@ -153,6 +153,19 @@ fn hash_args(args: &[Val]) -> u64 { h } +/* True when every arg is value-stable across calls. Mutable containers + (List, Dict, Set, Instance) hash by raw heap-index bits, so a caller + that mutates the same container between two calls produces a stale + cache hit. Only memoize when every arg is byte-by-byte immutable. */ +fn args_memoizable(args: &[Val], heap: &super::types::HeapPool) -> bool { + use super::types::HeapObj; + args.iter().all(|v| { + if !v.is_heap() { return true; } + !matches!(heap.get(*v), HeapObj::List(_) | HeapObj::Dict(_) + | HeapObj::Set(_) | HeapObj::Instance(..)) + }) +} + // Indexed by `fi` (function id, dense from 0..N). Vec gives O(1) lookup // without a HashMap monomorphization. pub struct Templates { slots: Vec> } @@ -168,6 +181,7 @@ impl Templates { } pub fn record(&mut self, fi: usize, args: &[Val], result: Val, heap: &super::types::HeapPool) { + if !args_memoizable(args, heap) { return; } if self.slots.len() <= fi { self.slots.resize_with(fi + 1, Vec::new); } let h = hash_args(args); let v = &mut self.slots[fi]; diff --git a/documentation/implementation/design.md b/documentation/implementation/design.md index c41a613..7e56ab1 100644 --- a/documentation/implementation/design.md +++ b/documentation/implementation/design.md @@ -17,7 +17,7 @@ Classes are state containers, not the primary abstraction. Inheritance, descript - **Single-pass SSA codegen**: Variables are versioned per assignment (`x` -> `x_1`, `x_2`). Control-flow joins emit explicit `Phi` opcodes resolved at runtime. - **Token-threaded dispatch**: The instruction stream is `Vec` where each `Instruction` is `(opcode: OpCode, operand: u16)`. The hot loop is a flat `match` on the opcode variant. Rust lowers it to a jump table; this is *token threading*, not direct threading (computed-goto is not available in safe Rust). - **Per-instruction inline caching**: Each binary op records the type tags of its operands. After `QUICK_THRESH = 4` stable hits the IC stores a typed `FastOp` (`AddInt`, `AddFloat`, `AddStr`, `LtFloat`, `EqStr`, `ModInt`, ...) used as a speculative fast path with a type-guard deopt that invalidates the slot on miss. -- **Template memoisation**: Pure user functions cache `(args) -> result` after `TPL_THRESH = 2` hits, capped at 256 entries per function, gated on no-kw call and an outer scope that hasn't been observed performing impure ops (`StoreItem`, `StoreAttr`, `Raise`, `Yield`, `Global`, `Nonlocal`, `Import`, ...). Hashing uses an FNV-like fold over raw `Val.0` bits, with a value-eq verification step. +- **Template memoisation**: Pure user functions cache `(args) -> result` after `TPL_THRESH = 2` hits, capped at 256 entries per function, gated on no-kw call, an outer scope that hasn't been observed performing impure ops (`StoreItem`, `StoreAttr`, `Raise`, `Yield`, `Global`, `Nonlocal`, `Import`, ...), and on every argument being byte-stable (mutable containers — `list`, `dict`, `set`, `Instance` — disqualify the call from caching). Hashing uses an FNV-like fold over raw `Val.0` bits, with a value-eq verification step. - **NaN-boxed values**: `Val` is a 64-bit union encoding ints (47-bit signed, inline), floats (full IEEE-754 with NaNs canonicalised), bools, None, an undef sentinel, and 28-bit heap indices in a single word. - **Mark-and-sweep GC**: Triggered when `live >= gc_threshold` or `alloc_count >= max(live/4, 4096)`. After each sweep `gc_threshold = max(live * 2, 512)`. Roots include the stack, with-stack, yields, event queue, slots and live-slot snapshots, slot templates, globals, every iterator frame's `iter_stack`, opcode-cache constants, active const pools, and function templates. diff --git a/gate-templates-on-immutable-args.txt b/gate-templates-on-immutable-args.txt new file mode 100644 index 0000000..810b822 --- /dev/null +++ b/gate-templates-on-immutable-args.txt @@ -0,0 +1,44 @@ +Branch: gate-templates-on-immutable-args +Tech debt: D7 — Templates::record cached mutable args, returning stale results when the caller mutated the same container between calls. + +================================================================================ +Commit 1: skip template memoization when any arg is mutable +================================================================================ +Files: + - compiler/src/modules/vm/cache.rs + - documentation/implementation/design.md + +What was wrong: + hash_args (line 147) folded raw Val.0 bits — so two calls passing + the SAME list handle hashed identically, even if the list contents + changed between calls. args_match used eq_vals_with_heap which DID + walk contents, but only after the hash matched and only at the + point of `record()`/`lookup()`. The hit counter incremented on + identical-bits args; once hits >= TPL_THRESH, lookup returned the + cached `result` for the OLD contents. + + Concrete failure: a pure function f(xs) called as f(items), + items.append(...), f(items) would return f's prior value on the + second call if the implementation had cached. The eq verification + catches the mismatch only on certain inserts; under hash collisions + with byte-identical handles it does not. + +Fix: + Added `args_memoizable(args, heap)` which scans every arg and + returns false if any is HeapObj::List, Dict, Set, or Instance — + the four mutable containers in the heap. record() bails out early + on `!args_memoizable(...)`, so mutable-arg calls simply never enter + the cache. Immutable args (Int, Float, Bool, None, Str, Bytes, + Tuple, FrozenSet, Range, Slice, Func, Class, etc.) keep their + fast path unchanged. + + No change to lookup(): an entry never recorded cannot be looked up, + so the gate at write-time is sufficient. + +Documentation: + documentation/implementation/design.md line 20 enumerates the + memoisation gates. Added the new gate ("every argument being + byte-stable, mutable containers disqualify") to the inline list so + the spec matches the implementation. + +Tests: cargo test -> 6/6 passed. From 008e6c72525d67d119bd61c8aaa349e0a7b3ce13 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:45:28 -0600 Subject: [PATCH 20/31] reuse gc mark worklist across roots --- compiler/src/modules/vm/types/mod.rs | 21 +++++++++----- preallocate-gc-worklist.txt | 43 ++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 7 deletions(-) create mode 100644 preallocate-gc-worklist.txt diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index 3fb3614..f7a2f79 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -1,4 +1,4 @@ -use alloc::{rc::Rc, string::String, vec, vec::Vec}; +use alloc::{rc::Rc, string::String, vec::Vec}; use core::cell::RefCell; use crate::util::fx::{FxHashMap as HashMap, FxHashSet as HashSet}; @@ -351,6 +351,10 @@ pub struct HeapPool { bytes_intern: HashMap, u32>, // Cached Ellipsis slot index so `... is ...` is True (singleton parity). ellipsis_idx: Option, + /* Reused across every mark() call; capacity grows once and stays put so + GC never allocates from the same allocator it might be running short + of. Cleared, not freed, between traversals. */ + mark_worklist: Vec, } impl HeapPool { @@ -365,6 +369,7 @@ impl HeapPool { strings: HashMap::default(), bytes_intern: HashMap::default(), ellipsis_idx: None, + mark_worklist: Vec::with_capacity(64), } } @@ -410,14 +415,16 @@ impl HeapPool { pub fn mark(&mut self, v: Val) { if !v.is_heap() { return; } - let mut worklist = vec![v.as_heap()]; - while let Some(idx) = worklist.pop() { + /* Split borrow: closure needs &mut mark_worklist while we read slots. */ + let HeapPool { slots, mark_worklist, .. } = self; + mark_worklist.push(v.as_heap()); + while let Some(idx) = mark_worklist.pop() { let idx = idx as usize; - if self.slots[idx].marked { continue; } - self.slots[idx].marked = true; - if let Some(obj) = &self.slots[idx].obj { + if slots[idx].marked { continue; } + slots[idx].marked = true; + if let Some(obj) = &slots[idx].obj { for_each_val(obj, |val| { - if val.is_heap() { worklist.push(val.as_heap()); } + if val.is_heap() { mark_worklist.push(val.as_heap()); } }); } } diff --git a/preallocate-gc-worklist.txt b/preallocate-gc-worklist.txt new file mode 100644 index 0000000..86de548 --- /dev/null +++ b/preallocate-gc-worklist.txt @@ -0,0 +1,43 @@ +Branch: preallocate-gc-worklist +Tech debt: D33 — HeapPool::mark allocates a Vec on every call. + +================================================================================ +Commit 1: reuse the GC mark worklist across roots +================================================================================ +File: compiler/src/modules/vm/types/mod.rs + +What was wrong: + HeapPool::mark started with `let mut worklist = vec![v.as_heap()];` + on every invocation. collect() (gc.rs) calls mark() once per root — + stack, with-stack, yields, event queue, current slots, live slots, + every slot template, every globals entry, every iter frame, every + opcode-cache constant pool, every active const pool, plus + templates.mark_all. For a busy program that's hundreds of + allocations every GC cycle. + + Worse: the GC is running because the heap is under pressure. The + allocator we go to for the worklist is the same allocator we're + trying to relieve. On low-memory hosts (the WASM single-threaded + bump allocator) this can OOM the host while we're trying to free + memory. + +Fix: + Added a `mark_worklist: Vec` field to HeapPool, initialised in + HeapPool::new with a 64-slot capacity. mark() now reuses this Vec — + push/pop drain it cleanly back to length 0 (the loop exits when pop + returns None), so the next mark() inherits whatever capacity the + worst case grew to. The vector's allocation lives for the entire + HeapPool lifetime, not per-root. + + The closure inside mark() needed `&mut mark_worklist` while the loop + body reads `slots`, so I split the &mut self via destructure + let HeapPool { slots, mark_worklist, .. } = self; + to give the closure independent field-level borrows. + + Removed the now-unused `vec` macro import. + +Documentation: + GC docs (compiler/README.md, design.md) describe the algorithm and + roots, never the worklist representation. No doc updates required. + +Build & tests: cargo build -p edge-python -> ok (no warnings); cargo test -> 6/6 passed. From 2272d68184bbe05ed4f2d0375b75231c62b61987 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 15:21:46 -0600 Subject: [PATCH 21/31] drop unreachable dispatch_generic arm --- compiler/src/modules/vm/dispatch.rs | 3 -- remove-unreachable-dispatch-arms.txt | 41 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 remove-unreachable-dispatch-arms.txt diff --git a/compiler/src/modules/vm/dispatch.rs b/compiler/src/modules/vm/dispatch.rs index 3f0a81f..cdb10bc 100644 --- a/compiler/src/modules/vm/dispatch.rs +++ b/compiler/src/modules/vm/dispatch.rs @@ -666,9 +666,6 @@ impl<'a> VM<'a> { // Emitted by `break` inside a for-loop to drop the abandoned // iterator so the surrounding for-iter reads from its own iter. OpCode::PopIter => { self.iter_stack.pop(); } - OpCode::MakeClass | OpCode::StoreAttr => { - return Err(cold_runtime("MakeClass/StoreAttr must be in main dispatch")); - } _ => return Err(cold_runtime("unexpected opcode in generic dispatch")), } Ok(()) diff --git a/remove-unreachable-dispatch-arms.txt b/remove-unreachable-dispatch-arms.txt new file mode 100644 index 0000000..9bca9d8 --- /dev/null +++ b/remove-unreachable-dispatch-arms.txt @@ -0,0 +1,41 @@ +Branch: remove-unreachable-dispatch-arms +Tech debt: D42 — dead MakeClass/StoreAttr arm in dispatch_generic. + +================================================================================ +Commit 1: drop unreachable dispatch_generic arm +================================================================================ +File: compiler/src/modules/vm/dispatch.rs + +What was wrong: + dispatch_generic (line 576) is the fallback called from the main + dispatch via: + other => self.dispatch_generic(other, op, slots)? + It only ever sees opcodes the main match did NOT handle. + + The main match has explicit arms for OpCode::MakeClass at line 483 + and OpCode::StoreAttr at line 515, so neither variant can reach + dispatch_generic. Yet dispatch_generic carried: + + OpCode::MakeClass | OpCode::StoreAttr => + return Err(cold_runtime("MakeClass/StoreAttr must be in main dispatch")); + + Defensive in intent, dead in practice. Worse, it gives a false sense + of exhaustiveness: the catch-all `_` immediately after means the + presence of explicit arms here doesn't add real coverage. + +Fix: + Removed the unreachable arm. The catch-all + _ => return Err(cold_runtime("unexpected opcode in generic dispatch")) + remains as the runtime safety net. + + The follow-up suggestion to replace `_` with an explicit list of + variants legitimately reaching dispatch_generic — so adding a new + OpCode breaks the build instead of being silently dropped to runtime + error — is structural and is left for a separate refactor. + +Documentation: + README and design.md mention MakeClass/StoreAttr only at the language + level (decorators, impurity tracking), not at the dispatch level. No + doc changes required. + +Tests: cargo test -> 6/6 passed. From 674a57e5f37e5a4c332d26d39abf63daa6ba93da Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 15:26:14 -0600 Subject: [PATCH 22/31] drop FmtBuf for plain string --- compiler/src/util/fstr.rs | 30 +++++++---------------- replace-fmtbuf-with-string.txt | 44 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 22 deletions(-) create mode 100644 replace-fmtbuf-with-string.txt diff --git a/compiler/src/util/fstr.rs b/compiler/src/util/fstr.rs index 813fd29..ac80d14 100644 --- a/compiler/src/util/fstr.rs +++ b/compiler/src/util/fstr.rs @@ -22,29 +22,15 @@ pub fn format_f64(f: f64) -> alloc::string::String { format_general(f) } -/* 32-byte stack buffer fits any f64 default format. */ +/* f64 default format fits in ~24 bytes for most values; preallocate 32 to avoid + regrowth on the common case. Using String over a stack buffer trades a tiny + allocation for safety: no silent truncation, no from_utf8_unchecked. */ fn format_general(f: f64) -> alloc::string::String { - let mut buf = FmtBuf::new(); - let _ = core::fmt::write(&mut buf, core::format_args!("{}", f)); - alloc::string::String::from(buf.as_str()) -} - -struct FmtBuf { buf: [u8; 32], len: usize } -impl FmtBuf { - fn new() -> Self { Self { buf: [0u8; 32], len: 0 } } - fn as_str(&self) -> &str { - unsafe { core::str::from_utf8_unchecked(&self.buf[..self.len]) } - } -} -impl core::fmt::Write for FmtBuf { - fn write_str(&mut self, s: &str) -> core::fmt::Result { - let bytes = s.as_bytes(); - let end = (self.len + bytes.len()).min(self.buf.len()); - let n = end - self.len; - self.buf[self.len..end].copy_from_slice(&bytes[..n]); - self.len = end; - Ok(()) - } + use core::fmt::Write; + let mut out = alloc::string::String::with_capacity(32); + /* core::fmt::Write::write_fmt is infallible for a String. */ + let _ = write!(&mut out, "{}", f); + out } #[macro_export] diff --git a/replace-fmtbuf-with-string.txt b/replace-fmtbuf-with-string.txt new file mode 100644 index 0000000..132ecb4 --- /dev/null +++ b/replace-fmtbuf-with-string.txt @@ -0,0 +1,44 @@ +Branch: replace-fmtbuf-with-string +Tech debt: D44 — FmtBuf truncates silently and uses from_utf8_unchecked. + +================================================================================ +Commit 1: drop FmtBuf in favour of plain String +================================================================================ +File: compiler/src/modules/fstr.rs + +What was wrong: + format_general formatted floats into a 32-byte stack buffer with two + hazards: + + 1. write_str clamped to `(self.len + bytes.len()).min(self.buf.len())`, + silently dropping any overflow. f64::Display normally fits, but + non-finite paths (subnormal denormals near MIN_POSITIVE, certain + "{:?}"-via-Display future changes) could overflow without surfacing + an error. + 2. as_str() called core::str::from_utf8_unchecked on the partially + filled buffer. The truncation in #1 is byte-aligned, not UTF-8 + aware, so a future change that piped a multi-byte char through + here would produce UB at the seam. + + Both hazards exist purely as a micro-optimisation: avoid the + String allocation. f64 formatting already runs the Rust fmt machinery + (which itself allocates indirectly via the float printing + implementation), so saving one tiny String alloc was never the + bottleneck. + +Fix: + Removed the FmtBuf struct entirely. format_general now uses + let mut out = alloc::string::String::with_capacity(32); + let _ = write!(&mut out, "{}", f); + out + with `use core::fmt::Write` brought into scope locally. The 32-byte + preallocation matches the previous stack budget so the allocator hits + zero realloc on the common case. write! into a String is infallible, + so the `let _ =` is preserved (the Result is purely structural). + + No more silent truncation, no more from_utf8_unchecked. + +Documentation: + No .md docs reference FmtBuf or the format_general internals. + +Tests: cargo test -> 6/6 passed. From e0b3dcaa13084105723ef7fe7d29defb460d7e20 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 15:19:17 -0600 Subject: [PATCH 23/31] drop unused push macro --- collapse-push-macro-into-s.txt | 35 ++++++++++++++++++++++++++ compiler/src/util/fstr.rs | 10 -------- documentation/implementation/design.md | 2 +- 3 files changed, 36 insertions(+), 11 deletions(-) create mode 100644 collapse-push-macro-into-s.txt diff --git a/collapse-push-macro-into-s.txt b/collapse-push-macro-into-s.txt new file mode 100644 index 0000000..cea8a84 --- /dev/null +++ b/collapse-push-macro-into-s.txt @@ -0,0 +1,35 @@ +Branch: collapse-push-macro-into-s +Tech debt: D43 — redundant push! macro alongside s!. + +================================================================================ +Commit 1: drop unused push macro +================================================================================ +Files: + - compiler/src/modules/fstr.rs + - documentation/implementation/design.md + +What was wrong: + fstr.rs exported two #[macro_export] macros that covered the same + domain (no-alloc string formatting): + - push!(s, ...) — single fragment, six explicit type arms. + - s!(...) — multiple fragments via a recursive @b helper, same + type matrix. + Any new fragment kind (e.g. hex, dec_groups) had to be added in two + places, with mechanical drift risk. push! also lacked s!'s `cap:` form + and could not be chained. + + Workspace-wide grep for `push!` returned zero call sites outside the + declaration itself. The macro was simply dead surface. + +Fix: + Removed the entire push! definition. s! remains the single string + builder. No call sites had to change. + +Documentation: + documentation/implementation/design.md line 92 listed + `# numeric formatter + s!/push!/err! string macros` for fstr.rs. + Updated to drop `push!/` so the inventory matches the file. + + compiler/README.md only lists the file path; no macro names to drift. + +Tests: cargo test -> 6/6 passed. diff --git a/compiler/src/util/fstr.rs b/compiler/src/util/fstr.rs index ac80d14..8032450 100644 --- a/compiler/src/util/fstr.rs +++ b/compiler/src/util/fstr.rs @@ -33,16 +33,6 @@ fn format_general(f: f64) -> alloc::string::String { out } -#[macro_export] -macro_rules! push { - ($s:ident, $v:literal) => { $s.push_str($v); }; - ($s:ident, str $v:expr) => { $s.push_str($v); }; - ($s:ident, int $v:expr) => {{ let mut b = itoa::Buffer::new(); $s.push_str(b.format($v)); }}; - ($s:ident, float $v:expr) => { $s.push_str(&$crate::util::fstr::format_f64($v)); }; - ($s:ident, char $v:expr) => { $s.push($v); }; - ($s:ident, bool $v:expr) => { $s.push_str(if $v { "true" } else { "false" }); }; -} - #[macro_export] macro_rules! s { (@b $s:ident;) => {}; diff --git a/documentation/implementation/design.md b/documentation/implementation/design.md index 7e56ab1..fea871a 100644 --- a/documentation/implementation/design.md +++ b/documentation/implementation/design.md @@ -89,7 +89,7 @@ compiler/src/ │ ├── resolver.rs # walk-up packages.json + native bridge closure │ └── errors.rs ├── util/ # Internal helpers shared across the compiler (not stdlib) - │ ├── fstr.rs # numeric formatter + s!/push!/err! string macros + │ ├── fstr.rs # numeric formatter + s!/err! string macros │ ├── fx.rs # FxHasher + per-map seeded FxBuildHasher │ └── sha256.rs # in-tree FIPS 180-4 SHA-256 (used by integrity) └── modules/ From e4668f8c662ef0151ef7d9637f42786553ea27f8 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 14:20:55 -0600 Subject: [PATCH 24/31] use fixed seed for fx hasher --- compiler/src/util/fx.rs | 23 ++++------------ deterministic-fx-hasher-seed.txt | 37 ++++++++++++++++++++++++++ documentation/implementation/design.md | 2 +- 3 files changed, 43 insertions(+), 19 deletions(-) create mode 100644 deterministic-fx-hasher-seed.txt diff --git a/compiler/src/util/fx.rs b/compiler/src/util/fx.rs index 3dc5bd8..8663b71 100644 --- a/compiler/src/util/fx.rs +++ b/compiler/src/util/fx.rs @@ -1,10 +1,12 @@ use core::hash::{BuildHasher, Hasher}; -use core::sync::atomic::{AtomicUsize, Ordering}; /* FxHash multiplier from rustc-hash. */ const K: u64 = 0x517cc1b727220a95; -static SEED_COUNTER: AtomicUsize = AtomicUsize::new(1); +/* Fixed seed: deterministic across runs so map iteration order is reproducible. + The compiler is single-threaded in trusted contexts (host-driven WASM), so + per-instance randomisation buys nothing here and breaks golden tests. */ +const FIXED_SEED: u64 = 0x9e3779b97f4a7c15; #[derive(Clone, Default)] pub struct FxHasher(u64); @@ -30,23 +32,8 @@ impl Hasher for FxHasher { pub struct FxBuildHasher(u64); impl FxBuildHasher { - /* Atomic counter seed, avalanche-mixed to decorrelate sequential values. */ #[inline] - pub fn new() -> Self { - let raw = SEED_COUNTER.fetch_add(1, Ordering::Relaxed) as u64; - Self(murmur3_fmix64(raw)) - } -} - -/* MurmurHash3 finalizer: spreads a 1-bit difference across all 64 bits. */ -#[inline] -fn murmur3_fmix64(mut h: u64) -> u64 { - h ^= h >> 33; - h = h.wrapping_mul(0xff51afd7ed558ccd); - h ^= h >> 33; - h = h.wrapping_mul(0xc4ceb9fe1a85ec53); - h ^= h >> 33; - h + pub fn new() -> Self { Self(FIXED_SEED) } } impl Default for FxBuildHasher { diff --git a/deterministic-fx-hasher-seed.txt b/deterministic-fx-hasher-seed.txt new file mode 100644 index 0000000..7c4f77e --- /dev/null +++ b/deterministic-fx-hasher-seed.txt @@ -0,0 +1,37 @@ +Branch: deterministic-fx-hasher-seed +Tech debt: D48 — non-deterministic FxBuildHasher seed. + +================================================================================ +Commit 1: use fixed seed for FxBuildHasher +================================================================================ +Files: + - compiler/src/modules/fx.rs + - documentation/implementation/design.md + +What was wrong: + fx.rs used a global atomic counter (SEED_COUNTER) plus the MurmurHash3 + finalizer to assign each FxBuildHasher a per-instance seed. In a host- + driven WASM compiler that is single-threaded and runs only trusted input, + the per-instance randomisation buys nothing — there is no DoS attacker + whose collisions need to be foiled. What it does cost: + + - Map iteration order varies across runs of the same input. Diagnostic + output that lists names from a FxHashMap (e.g. "X imports never used: + [...]") differs between runs. + - Bytecode artifacts that serialise any hash-ordered table cannot be + compared byte-for-byte across builds. + - Golden tests that assert exact stdout become flaky. + +Fix: + Replaced the atomic counter and murmur3 finaliser with a single fixed + constant FIXED_SEED (0x9e3779b97f4a7c15, the golden ratio in u64 — a + conventional choice for a non-zero, non-trivial seed). FxBuildHasher::new() + is now Self(FIXED_SEED). Removed the unused AtomicUsize, Ordering import, + SEED_COUNTER static, and murmur3_fmix64 helper. + +Documentation: + documentation/implementation/design.md line 93 listed fx.rs as + "per-map seeded FxBuildHasher", which directly contradicts the fix. + Updated to "fixed-seed FxBuildHasher (deterministic)". + +Tests: cargo test -> 6/6 passed. diff --git a/documentation/implementation/design.md b/documentation/implementation/design.md index fea871a..b4d3e4d 100644 --- a/documentation/implementation/design.md +++ b/documentation/implementation/design.md @@ -90,7 +90,7 @@ compiler/src/ │ └── errors.rs ├── util/ # Internal helpers shared across the compiler (not stdlib) │ ├── fstr.rs # numeric formatter + s!/err! string macros - │ ├── fx.rs # FxHasher + per-map seeded FxBuildHasher + │ ├── fx.rs # FxHasher + fixed-seed FxBuildHasher (deterministic) │ └── sha256.rs # in-tree FIPS 180-4 SHA-256 (used by integrity) └── modules/ ├── lexer/ From 5ab64b68c0d78cfeb4c115b78a3828614b00220a Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:41:48 -0600 Subject: [PATCH 25/31] mark Val from_raw unsafe --- compiler/src/modules/vm/types/mod.rs | 8 ++++- mark-val-from-raw-unsafe.txt | 46 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 mark-val-from-raw-unsafe.txt diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index f7a2f79..7796484 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -136,7 +136,13 @@ impl Val { #[inline(always)] pub fn as_float(&self) -> f64 { f64::from_bits(self.0) } /* Public accessors for wire-format marshalling (FFI / WASM loader / SDK). */ #[inline(always)] pub fn raw(&self) -> u64 { self.0 } - #[inline(always)] pub fn from_raw(u: u64) -> Self { Self(u) } + /* # Safety + Callers must guarantee `u` was produced by `Val::raw()` on a Val whose + referenced heap slot is still live in *this* HeapPool. Constructing a + Val with an arbitrary u64 — or one obtained from a different VM + instance — violates the GC invariant and will cause `HeapPool::get` + to panic on first dereference. */ + #[inline(always)] pub unsafe fn from_raw(u: u64) -> Self { Self(u) } #[inline(always)] pub fn as_int(&self) -> i64 { let raw = (self.0 & INT_PAYLOAD_MASK) as i64; (raw << 16) >> 16 diff --git a/mark-val-from-raw-unsafe.txt b/mark-val-from-raw-unsafe.txt new file mode 100644 index 0000000..c01277b --- /dev/null +++ b/mark-val-from-raw-unsafe.txt @@ -0,0 +1,46 @@ +Branch: mark-val-from-raw-unsafe +Tech debt: D11 (partial) — Val::from_raw was a safe public constructor for an unsafe contract. + +================================================================================ +Commit 1: mark Val::from_raw unsafe and document GC contract +================================================================================ +File: compiler/src/modules/vm/types/mod.rs + +What was wrong: + Line 142 declared: + + pub fn from_raw(u: u64) -> Self { Self(u) } + + This is the public wire-format constructor for a Val. It accepted ANY + u64, including values that decode to Heap with an arbitrary 28-bit + index. HeapPool::get and HeapPool::get_mut then index into + `slots[v.as_heap() as usize]` and call `.expect(...)` if the slot is + free. The combined effect: an external caller passing a u64 they made + up panics the host process. + + This is the second half of the broader D5 problem (HeapPool::get + panics on dangling references). The structural fix for D5 — return + Result instead of panicking — is a much larger refactor across every + call site. Marking the constructor unsafe at least pushes the + responsibility onto the wire-format caller, where it belongs, and + matches the documented intent that this Val came from `raw()` on the + same HeapPool. + +Fix: + Changed the signature to: + + pub unsafe fn from_raw(u: u64) -> Self { Self(u) } + + Added a /* # Safety */ doc block describing the invariant: the u64 + must have come from Val::raw() on a Val whose heap slot is still + live in this HeapPool. No internal Rust call sites use Val::from_raw, + so the workspace builds and tests cleanly without further changes. + + External consumers (FFI / WASM loaders / future SDKs) now have to + wrap the call in `unsafe { ... }`, which is the correct signal — + they were already taking on this responsibility implicitly. + +Documentation: + No .md docs reference Val::from_raw directly. + +Tests: cargo test -> 6/6 passed. From c4101436f32f8c639417b4cde1336e207f17e7c1 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 20:50:34 -0600 Subject: [PATCH 26/31] remove expect and unwrap on ffi dispatch paths --- compiler/src/main/abi_bridge.rs | 15 ++++++++--- remove-ffi-boundary-panics.txt | 47 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 remove-ffi-boundary-panics.txt diff --git a/compiler/src/main/abi_bridge.rs b/compiler/src/main/abi_bridge.rs index 3c34499..516f317 100644 --- a/compiler/src/main/abi_bridge.rs +++ b/compiler/src/main/abi_bridge.rs @@ -48,7 +48,10 @@ fn dispatch_call(recv_h: u32, name: &str, args: Vec) -> Result if vm.stack.len() != stack_before + 1 { return Err(VmErr::Runtime("edge_op call: method left no result")); } - Ok(vm.stack.pop().unwrap()) + /* The length check above guarantees a value is present; ok_or + keeps the FFI boundary panic-free if a future change drops + the invariant. */ + vm.stack.pop().ok_or(VmErr::Runtime("edge_op call: stack drained mid-dispatch")) }) } @@ -125,7 +128,7 @@ fn dispatch_get_item(recv_h: u32, args: &[Val]) -> Result { if vm.stack.len() != stack_before + 1 { return Err(VmErr::Runtime("edge_op get_item: get_item left no result")); } - Ok(vm.stack.pop().unwrap()) + vm.stack.pop().ok_or(VmErr::Runtime("edge_op get_item: stack drained mid-dispatch")) }) } @@ -302,8 +305,12 @@ pub unsafe extern "C" fn host_edge_take_error(out_kind: *mut u32, dst: *mut u8, None => return -1, }; if len > dst_max as usize { return -(len as i32); } - // Buffer fits — drain and copy. - let (_, msg) = stash.take().expect("peek returned Some"); + // Buffer fits — drain and copy. Treat a None on take() as "race lost the + // peek/take window" and surface it as no-pending-error rather than panic + // across the FFI boundary (the .expect() previously here violated + // "panics never cross FFI"; in single-threaded WASM this is unreachable + // today but we don't want a future scheduler change to weaponise it). + let Some((_, msg)) = stash.take() else { return -1; }; let bytes = msg.as_bytes(); unsafe { *out_kind = kind; diff --git a/remove-ffi-boundary-panics.txt b/remove-ffi-boundary-panics.txt new file mode 100644 index 0000000..178a09b --- /dev/null +++ b/remove-ffi-boundary-panics.txt @@ -0,0 +1,47 @@ +Branch: remove-ffi-boundary-panics +Tech debt: D26 (companion to stash-panic-before-trap) — expect/unwrap calls in abi_bridge.rs let panics cross the WASM FFI boundary as opaque traps. + +================================================================================ +Commit 1: replace expect/unwrap on FFI dispatch paths with propagated VmErr +================================================================================ +File: compiler/src/main/abi_bridge.rs + +What was wrong: + Three sites violated the "panics never cross FFI" rule: + + - host_edge_take_error (line 305): + let (_, msg) = stash.take().expect("peek returned Some"); + Single-threaded WASM makes this unreachable today, but the + `expect` is a foot-gun: any future scheduler change that lets + another path `take()` between the peek and the take above would + panic and trap the module without a message. + + - dispatch_call (line 50): `Ok(vm.stack.pop().unwrap())` + - dispatch_get_item (line 127): `Ok(vm.stack.pop().unwrap())` + Both protected by an `if vm.stack.len() != stack_before + 1` + invariant, but the `unwrap` is a second invariant — if the check + above is ever weakened (e.g. someone bumps the +1 to +N), the + unwrap goes from "redundant" to "panic" silently. + +Fix: + - host_edge_take_error: replaced `expect` with a `let-else` + pattern that returns -1 (no pending error) on the unreachable + branch. Documented in a /* ... */ comment why the prior `.expect` + was a latent FFI hazard. + - dispatch_call / dispatch_get_item: replaced `.unwrap()` with + `.ok_or(VmErr::Runtime("...stack drained mid-dispatch"))`. + Behaviour for the happy path is byte-identical; the previously- + unreachable branch now propagates a typed error instead of + panicking the host process. + + Combined with stash-panic-before-trap (already merged, captures + panics that DO occur), the FFI boundary now never traps with a + bare unreachable except from genuinely unrecoverable Rust panics + outside our reach (e.g. allocator OOM during the format). + +Documentation: + No .md docs reference the old expect/unwrap behaviour. wasm-abi.md + describes the public ABI contract, which is unchanged: the same + status codes carry the same meanings. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. From 755b448a7b73f76903d8d446170daadc309a5dac Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 14:24:17 -0600 Subject: [PATCH 27/31] validate utf-8 on host input buffer --- compiler/src/main/exports.rs | 7 +++++-- validate-utf8-on-host-input.txt | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 validate-utf8-on-host-input.txt diff --git a/compiler/src/main/exports.rs b/compiler/src/main/exports.rs index 8a3b4c3..089a22e 100644 --- a/compiler/src/main/exports.rs +++ b/compiler/src/main/exports.rs @@ -133,9 +133,12 @@ pub unsafe extern "C" fn run(len: usize) -> usize { vm.strict_input = true; let inp_len = unsafe { INP_LEN }; if inp_len > 0 { - let inp = unsafe { core::str::from_utf8_unchecked( + /* Host-supplied buffer; validate UTF-8 to keep the FFI boundary safe. + Invalid bytes degrade to an empty input rather than UB. */ + let bytes = unsafe { core::slice::from_raw_parts(core::ptr::addr_of!(INP) as *const u8, inp_len) - )}; + }; + let inp = core::str::from_utf8(bytes).unwrap_or(""); vm.input_buffer = inp.split('\n').map(alloc::string::String::from).collect(); unsafe { INP_LEN = 0; } } diff --git a/validate-utf8-on-host-input.txt b/validate-utf8-on-host-input.txt new file mode 100644 index 0000000..2d5743a --- /dev/null +++ b/validate-utf8-on-host-input.txt @@ -0,0 +1,36 @@ +Branch: validate-utf8-on-host-input +Tech debt: D3 — from_utf8_unchecked on host-controlled INP buffer is UB if the host writes non-UTF-8 bytes. + +================================================================================ +Commit 1: validate utf-8 on host input buffer +================================================================================ +File: compiler/src/main/exports.rs + +What was wrong: + Lines 117-124 read INP (a host-written byte buffer) and constructed a + &str via core::str::from_utf8_unchecked. Nothing in the wasm-abi spec + forces the host to validate UTF-8 before writing INP, so a buggy or + hostile host that writes a non-UTF-8 byte sequence triggered immediate + undefined behaviour: every downstream &str method (split, chars, len) + becomes UB on a non-UTF-8 buffer, even before the parse output is ever + inspected. + + This is a pure boundary-validation gap. The cost of from_utf8 vs + from_utf8_unchecked is one linear scan over the buffer — negligible + next to compile + run. + +Fix: + Replaced the unchecked construction with core::str::from_utf8(bytes) + and degraded invalid input to an empty &str via .unwrap_or(""). + Behaviour for valid UTF-8 is identical; invalid bytes now produce an + empty input_buffer instead of UB. Added a /* ... */ comment in the + lexer/parser style explaining the boundary intent. + + Companion debt D2 (the *mut VM<'static> lifetime laundering five lines + below) is structural and lives on its own branch; not touched here. + +Documentation: + reference/builtins.md only mentions input() at the language level; no + UB or boundary discussion to update. + +Tests: cargo test -> 6/6 passed. From 3aaa0d56131f7e92d4bf2212c1d145b779f1b178 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 15:24:39 -0600 Subject: [PATCH 28/31] release handle via raii in Handle len --- edge-pdk/src/lib.rs | 7 ++++--- handle-len-via-raii.txt | 46 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 handle-len-via-raii.txt diff --git a/edge-pdk/src/lib.rs b/edge-pdk/src/lib.rs index 7cde6bf..7c1a3aa 100644 --- a/edge-pdk/src/lib.rs +++ b/edge-pdk/src/lib.rs @@ -525,8 +525,9 @@ impl Handle { ) }; if r != 0 { return Err(last_error()); } - let v = i64::from_handle(out)?; - unsafe { edge_release(out); } - Ok(v) + /* Wrap into a Handle so Drop releases on every exit path, including + the `?` from a future from_handle that fails between decode and release. */ + let h = Handle::from_raw(out); + i64::from_handle(h.raw()) } } diff --git a/handle-len-via-raii.txt b/handle-len-via-raii.txt new file mode 100644 index 0000000..e49514c --- /dev/null +++ b/handle-len-via-raii.txt @@ -0,0 +1,46 @@ +Branch: handle-len-via-raii +Tech debt: D57 — Handle::len() decodes then manually edge_releases, leaking on any future ? between the two calls. + +================================================================================ +Commit 1: route handle release through Drop in Handle::len +================================================================================ +File: edge-pdk/src/lib.rs + +What was wrong: + Handle::len had this pattern at lines 459-461: + + let v = i64::from_handle(out)?; + unsafe { edge_release(out); } + Ok(v) + + This is correct today only because i64::from_handle currently cannot + short-circuit between `out` and the manual release. But: + + 1. The rest of the impl block uses Handle::from_raw(out) which lets + Drop run edge_release automatically. len() is the only outlier. + 2. If a maintainer adds another `?`-returning step between decode and + release — e.g. validating range, attaching a span — the handle + leaks silently. + 3. The `unsafe` block at the call site exposes raw FFI in API code + that has a perfectly good RAII alternative one struct away. + +Fix: + Replaced the manual decode+release with: + + let h = Handle::from_raw(out); + i64::from_handle(h.raw()) + + Now Drop owns the release on every exit path (success, ?-propagation, + panic). No `unsafe`. The behaviour is identical for all current + callers. + + A short /* ... */ comment in the lexer/parser style records *why* the + rewrite — a future ? would have leaked. + +Documentation: + reference/wasm-abi.md describes edge_release at the ABI level (owned + vs. borrowed handles, manual release rules in raw FFI examples). It + does not describe the high-level Handle wrapper's RAII contract, so + no doc changes are required. + +Build & tests: cargo build -p edge-pdk -> ok; cargo test -> 6/6 passed. From 150447f354429bb89971f6a6db7a54172d8494e8 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 16:49:49 -0600 Subject: [PATCH 29/31] stash panic message before trap --- compiler/src/main/mod.rs | 16 ++++++++++++- stash-panic-before-trap.txt | 48 +++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 stash-panic-before-trap.txt diff --git a/compiler/src/main/mod.rs b/compiler/src/main/mod.rs index 42872c9..a8dd59e 100644 --- a/compiler/src/main/mod.rs +++ b/compiler/src/main/mod.rs @@ -37,8 +37,22 @@ A ~3,000-alloc perceptron run pays ~600 ms; bumping cuts it to ~50 grows. #[global_allocator] static A: AssumeSingleThreaded = unsafe { AssumeSingleThreaded::new(LeakingAllocator::new()) }; +/* Best-effort panic-to-stash: the host's edge_take_error then sees a typed + message instead of an opaque WASM trap. If the format allocation itself + re-enters this handler we fall through to unreachable(); the host trap + behaviour is unchanged from the previous bare implementation. */ #[panic_handler] -fn panic(_: &core::panic::PanicInfo) -> ! { core::arch::wasm32::unreachable() } +fn panic(info: &core::panic::PanicInfo) -> ! { + let msg = alloc::format!("internal panic: {}", info.message()); + unsafe { + let p = core::ptr::addr_of_mut!(ERROR_STASH); + if (*p).is_none() { *p = Some(ErrorStash::new()); } + if let Some(stash) = (*p).as_mut() { + stash.set(crate::abi::ErrorKind::Runtime as u32, msg); + } + } + core::arch::wasm32::unreachable() +} pub(super) const SZ: usize = 1 << 20; #[allow(non_upper_case_globals)] diff --git a/stash-panic-before-trap.txt b/stash-panic-before-trap.txt new file mode 100644 index 0000000..468e4f2 --- /dev/null +++ b/stash-panic-before-trap.txt @@ -0,0 +1,48 @@ +Branch: stash-panic-before-trap +Tech debt: D26 (panic_handler portion) — bare wasm32::unreachable() trap discarded the panic message. + +================================================================================ +Commit 1: stash panic message before trapping +================================================================================ +File: compiler/src/main/mod.rs + +What was wrong: + panic_handler at line 41 read: + + fn panic(_: &core::panic::PanicInfo) -> ! { core::arch::wasm32::unreachable() } + + When *any* code on the WASM side panicked — a hand-written invariant, + an underflow, a malformed bytecode reaching an unreachable catch — the + module trapped immediately. The host's edge_take_error returned -1 ("no + message"), so the user saw "the WASM module crashed" with zero context. + Reproducing intermittent panics from production became a guessing game. + +Fix: + Captured `info.message()` into a String, stashed it in ERROR_STASH with + ErrorKind::Runtime (kind=2), then trapped. The host's edge_take_error + now drains it and returns a typed message like "internal panic: index + out of bounds: the len is 4 but the index is 7". + + Defensive considerations: + - alloc::format! itself can panic (allocator OOM). If that happens, + panic_handler re-enters; the stash never gets set; we fall through + to the original unreachable() trap. Net behaviour for the host is + identical to the previous code, so the fix is strictly additive. + - ERROR_STASH may not have been initialised yet (panic during very + early startup). The handler explicitly initialises it on first use, + matching the lazy pattern the rest of mod.rs uses for static state. + + A /* ... */ doc comment in the lexer/parser style records both the + intent and the recursion-on-OOM caveat. + + This addresses the panic_handler half of D26. The companion concern + (replace .expect()/unwrap() in abi_bridge.rs with propagated errors) + is left for a separate branch. + +Documentation: + reference/wasm-abi.md and reference/writing-modules.md include + panic_handler snippets — these are templates for *plugin authors* + writing their own .wasm modules, not for the host's compiler.wasm. + No doc updates required. + +Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. From 3b875b8cde5635c2a88171980a3a375f03c5a0e0 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 23:15:19 -0600 Subject: [PATCH 30/31] docs(vm): add safety docstring for a unsafe function to address linter. --- compiler/src/modules/vm/types/mod.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index 7796484..49d4977 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -136,12 +136,9 @@ impl Val { #[inline(always)] pub fn as_float(&self) -> f64 { f64::from_bits(self.0) } /* Public accessors for wire-format marshalling (FFI / WASM loader / SDK). */ #[inline(always)] pub fn raw(&self) -> u64 { self.0 } - /* # Safety - Callers must guarantee `u` was produced by `Val::raw()` on a Val whose - referenced heap slot is still live in *this* HeapPool. Constructing a - Val with an arbitrary u64 — or one obtained from a different VM - instance — violates the GC invariant and will cause `HeapPool::get` - to panic on first dereference. */ + /** # Safety + * `u` must come from `Val::raw()` on a live heap slot in the same VM. + * */ #[inline(always)] pub unsafe fn from_raw(u: u64) -> Self { Self(u) } #[inline(always)] pub fn as_int(&self) -> i64 { let raw = (self.0 & INT_PAYLOAD_MASK) as i64; From c9d2ba33b278aee1d1d67fd58cdda2190fc1efa4 Mon Sep 17 00:00:00 2001 From: dylan-sutton-chavez Date: Sat, 9 May 2026 23:27:23 -0600 Subject: [PATCH 31/31] refactor(reports): remove commits reports in the actual branch. --- add-bell-backspace-form-vtab-escapes.txt | 30 ---------- add-edge-pdk-prelude.txt | 52 ---------------- add-pdk-module-bootstrap-macro.txt | 58 ------------------ add-wasm-free-export.txt | 40 ------------- centralize-nanbox-constants.txt | 75 ------------------------ centralize-src-buffer-read.txt | 47 --------------- collapse-push-macro-into-s.txt | 35 ----------- deterministic-fx-hasher-seed.txt | 37 ------------ distinguish-overflow-from-type-miss.txt | 62 -------------------- export-pdk-abi-version.txt | 63 -------------------- fix-host-edge-encode-doc.txt | 32 ---------- fix-plugin-fn-macro-error-message.txt | 27 --------- fix-raw-string-prefix-detection.txt | 32 ---------- gate-templates-on-immutable-args.txt | 44 -------------- generalize-int-overflow-message.txt | 44 -------------- handle-len-via-raii.txt | 46 --------------- hide-pdk-internals-submodule.txt | 62 -------------------- mark-val-from-raw-unsafe.txt | 46 --------------- move-utils-to-util-mod.txt | 53 ----------------- preallocate-gc-worklist.txt | 43 -------------- remove-cosmetic-package-aliases.txt | 28 --------- remove-ffi-boundary-panics.txt | 47 --------------- remove-unreachable-dispatch-arms.txt | 41 ------------- rename-macros-to-edge-pdk-macros.txt | 50 ---------------- replace-fmtbuf-with-string.txt | 44 -------------- stash-panic-before-trap.txt | 48 --------------- tighten-resolver-internals.txt | 72 ----------------------- validate-utf8-on-host-input.txt | 36 ------------ workspace-dependencies-table.txt | 51 ---------------- 29 files changed, 1345 deletions(-) delete mode 100644 add-bell-backspace-form-vtab-escapes.txt delete mode 100644 add-edge-pdk-prelude.txt delete mode 100644 add-pdk-module-bootstrap-macro.txt delete mode 100644 add-wasm-free-export.txt delete mode 100644 centralize-nanbox-constants.txt delete mode 100644 centralize-src-buffer-read.txt delete mode 100644 collapse-push-macro-into-s.txt delete mode 100644 deterministic-fx-hasher-seed.txt delete mode 100644 distinguish-overflow-from-type-miss.txt delete mode 100644 export-pdk-abi-version.txt delete mode 100644 fix-host-edge-encode-doc.txt delete mode 100644 fix-plugin-fn-macro-error-message.txt delete mode 100644 fix-raw-string-prefix-detection.txt delete mode 100644 gate-templates-on-immutable-args.txt delete mode 100644 generalize-int-overflow-message.txt delete mode 100644 handle-len-via-raii.txt delete mode 100644 hide-pdk-internals-submodule.txt delete mode 100644 mark-val-from-raw-unsafe.txt delete mode 100644 move-utils-to-util-mod.txt delete mode 100644 preallocate-gc-worklist.txt delete mode 100644 remove-cosmetic-package-aliases.txt delete mode 100644 remove-ffi-boundary-panics.txt delete mode 100644 remove-unreachable-dispatch-arms.txt delete mode 100644 rename-macros-to-edge-pdk-macros.txt delete mode 100644 replace-fmtbuf-with-string.txt delete mode 100644 stash-panic-before-trap.txt delete mode 100644 tighten-resolver-internals.txt delete mode 100644 validate-utf8-on-host-input.txt delete mode 100644 workspace-dependencies-table.txt diff --git a/add-bell-backspace-form-vtab-escapes.txt b/add-bell-backspace-form-vtab-escapes.txt deleted file mode 100644 index b00e84d..0000000 --- a/add-bell-backspace-form-vtab-escapes.txt +++ /dev/null @@ -1,30 +0,0 @@ -Branch: add-bell-backspace-form-vtab-escapes -Tech debt: D70 — missing \a \b \f \v escapes in unescape and parse_bytes_literal. - -================================================================================ -Commit 1: add bell backspace form-feed vertical-tab escapes -================================================================================ -Files: - - compiler/src/modules/parser/types.rs - - documentation/implementation/lexical.md - -What was wrong: - unescape (parser/types.rs:422+) had cases for \n \t \r \\ \' \" \x \u \U - and 1-3 digit octal, but no cases for \a \b \f \v. These are valid Python - escape sequences for ASCII control characters BEL, BS, FF, VT. Without a - matching arm they fell through to the "Some(c) => out.push('\\'); out.push(c);" - branch, so "\\a" stayed as the two characters '\' and 'a'. - - parse_bytes_literal had the same gap. - -Fix: - Added \a -> 0x07, \b -> 0x08, \f -> 0x0C, \v -> 0x0B in both unescape (str) - and parse_bytes_literal (bytes), keeping the existing match-arm style. - -Documentation: - documentation/implementation/lexical.md line 78 explicitly enumerates the - recognised escapes; updating it was strictly necessary since omitting the - new escapes would leave the spec incomplete. Added \a \b \f \v to the - recognised-escapes list in the same compact inline format. - -Tests: cargo test -> 6/6 passed. diff --git a/add-edge-pdk-prelude.txt b/add-edge-pdk-prelude.txt deleted file mode 100644 index 5ae8665..0000000 --- a/add-edge-pdk-prelude.txt +++ /dev/null @@ -1,52 +0,0 @@ -Branch: add-edge-pdk-prelude -Tech debt: D56 — `use edge_pdk::*;` glob-imports #[doc(hidden)] symbols. - -================================================================================ -Commit 1: introduce edge_pdk::prelude -================================================================================ -File: edge-pdk/src/lib.rs - -What was wrong: - Plugin authors are expected to write `use edge_pdk::*;` (this is the - pattern in the wasm-abi doc and the slugify-mod example). The glob - brings in everything that's `pub` at the crate root, including the - two macro-internal symbols documented as #[doc(hidden)]: - - pub fn __stash_error(e: Error) // line 74 - pub extern "C" fn __edge_alloc(...) // line 111 - - These symbols exist because `#[plugin_fn]` expansion calls them; they - are not part of the user-facing API. But #[doc(hidden)] only hides - them from rustdoc — it does not exclude them from a glob import. So - every plugin author silently gets `__stash_error` and `__edge_alloc` - in their namespace, and any future rename of those internals is a - breaking change to plugins that don't use them. - -Fix: - Added a curated prelude module: - - pub mod prelude { - pub use crate::{plugin_fn, Handle, Value, Error, Result, - FromValue, IntoValue}; - } - - This is the standard Rust prelude pattern (cf. std::prelude). Plugin - authors can switch to `use edge_pdk::prelude::*;` to opt out of the - glob's noise. The legacy `use edge_pdk::*;` path is unchanged and - still works for existing plugins; the prelude is purely additive. - - A /* ... */ doc comment in the lexer/parser style explains the - intent so contributors know where to extend the surface. - - This addresses just the prelude half of D56. The companion fix — - moving __stash_error / __edge_alloc into a `__internals` submodule - so they cannot leak through any glob — is a breaking change for the - proc-macro and is left for a separate branch. - -Documentation: - reference/wasm-abi.md examples use `use edge_pdk::*;` which continues - to work unchanged. Recommending the prelude in those examples is a - pure migration suggestion, not a correctness fix; skipped to keep - the change strictly additive. - -Build & tests: cargo build -p edge-pdk -> ok; cargo test -> 6/6 passed. diff --git a/add-pdk-module-bootstrap-macro.txt b/add-pdk-module-bootstrap-macro.txt deleted file mode 100644 index 0d58039..0000000 --- a/add-pdk-module-bootstrap-macro.txt +++ /dev/null @@ -1,58 +0,0 @@ -Branch: add-pdk-module-bootstrap-macro -Tech debt: D55 — every plugin author re-declares the same allocator + panic_handler boilerplate. - -================================================================================ -Commit 1: add edge_pdk::module! bootstrap macro -================================================================================ -Files: - - edge-pdk/Cargo.toml (wasm32 lol_alloc dep) - - edge-pdk/src/lib.rs (re-export + macro_rules! module) - -What was wrong: - Each plugin's lib.rs duplicated four lines of boilerplate: - - extern crate alloc; - #[global_allocator] - static A: lol_alloc::LeakingPageAllocator = lol_alloc::LeakingPageAllocator; - #[panic_handler] - fn panic(_: &core::panic::PanicInfo) -> ! { core::arch::wasm32::unreachable() } - - Two real costs: - - lol_alloc had to be listed in every plugin's Cargo.toml. The - workspace already needed it for the host compiler; making each - plugin add it again is duplication that drifts (different version - pins between plugins is a real failure mode). - - The #[panic_handler] is wasm32-only. New plugin authors who try - to add unit tests find their host build fails because panic_handler - is `duplicate language item` or `cannot apply to host build`. - The cfg-gating that fixes this isn't obvious; the macro hides it. - -Fix: - - Added a wasm32-only `lol_alloc = "0.4"` dependency in - edge-pdk/Cargo.toml under `[target.'cfg(target_arch = "wasm32")'.dependencies]`, - so the host build (e.g. cargo test on the PDK itself) does not pull - it in. - - Re-exported the crate as `__lol_alloc` (under a hidden path) from - edge-pdk/src/lib.rs so the macro can name the symbol regardless of - whether the user added their own `lol_alloc` dep. - - Added a `macro_rules! module` (#[macro_export]) that emits the - #[global_allocator] static and the #[panic_handler] fn under - `#[cfg(target_arch = "wasm32")]`, so on host builds the macro - expands to nothing and `cargo test` works without further changes. - - Plugin authors can now write `edge_pdk::module!();` once at the top - of lib.rs and drop the four boilerplate lines. The pre-existing - manual pattern still works; the macro is strictly additive. - - /* ... */ doc comment in the lexer/parser style records the exact - attributes the macro emits and the still-required crate-root - attributes (#![no_std], #![no_main], extern crate alloc;) that - cannot be injected from inside an item position. - -Documentation: - reference/wasm-abi.md and reference/writing-modules.md show the - manual pattern. Both are still valid — the macro is opt-in. No doc - updates are strictly required (recommending the macro path is a - pure migration suggestion, not a correctness fix). - -Build & tests: cargo build -p edge-pdk -> ok; cargo test -> 6/6 passed. diff --git a/add-wasm-free-export.txt b/add-wasm-free-export.txt deleted file mode 100644 index 353cd9e..0000000 --- a/add-wasm-free-export.txt +++ /dev/null @@ -1,40 +0,0 @@ -Branch: add-wasm-free-export -Tech debt: D17 (partial) — wasm_alloc has no symmetric wasm_free, every host alloc leaks. - -================================================================================ -Commit 1: add wasm_free counterpart for wasm_alloc -================================================================================ -File: compiler/src/main/exports.rs - -What was wrong: - wasm_alloc returned a pointer obtained from Box::into_raw(...) of a - boxed slice, but no export let the host hand the pointer back to be - reclaimed. Every host-side staging buffer therefore leaked for the - lifetime of the WASM instance. The WASM runtime had grown to expect - this — the comment in mod.rs about the bump allocator masks the - fact that the underlying allocator itself can free, the host just - had no surface to ask for it. - -Fix: - Added the symmetric export: - - pub unsafe extern "C" fn wasm_free(ptr: *mut u8, size: u32); - - It reconstructs the boxed slice from (ptr, size) and drops it — the - exact inverse of wasm_alloc's `Box::into_raw(boxed_slice)`. Treats - null ptr and size == 0 as no-ops so the host can call it - unconditionally after every alloc. A /* ... */ comment in the - lexer/parser style records the precondition (size must match the - original alloc). - - This does NOT address the broader D2 (deprecate SRC/OUT/INP in - favour of handle-based ABI) — that's structural. It just closes - the alloc/free asymmetry so a host that wants to release buffers - can, today, without changing the rest of the wire. - -Documentation: - No .md docs reference wasm_alloc or wasm_free at the wire level (the - WASM ABI doc covers host imports / exports for handles, not these - staging buffers). No doc updates required. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/centralize-nanbox-constants.txt b/centralize-nanbox-constants.txt deleted file mode 100644 index 8411b0a..0000000 --- a/centralize-nanbox-constants.txt +++ /dev/null @@ -1,75 +0,0 @@ -Branch: centralize-nanbox-constants -Tech debt: D20 — NaN-boxing constants existed in three independent copies (classify_encode, classify_decode, vm::types::Val), drifting was a silent ABI break. - -================================================================================ -Commit 1: lift NaN-box layout into abi::nan_box -================================================================================ -Files: - - compiler/src/abi.rs - - compiler/src/modules/vm/types/mod.rs - -What was wrong: - Three places defined the same constants: - - 1. classify_encode (abi.rs:188+) — inner consts: QNAN, TAG_NONE_BITS, - TAG_TRUE_BITS, TAG_FALSE_BITS, TAG_INT_BITS. - 2. classify_decode (abi.rs:247+) — inner consts: QNAN, SIGN, TAG_INT. - 3. vm::types::Val (mod.rs:64+) — module-level consts: QNAN, SIGN, - TAG_UNDEF, TAG_NONE, TAG_TRUE, TAG_FALSE, TAG_INT, TAG_HEAP. - - All three carried the same numeric values, but cargo had no way to - prove that. A bump in any one (e.g. moving the heap tag bit nibble) - would compile cleanly and silently corrupt round-tripping at the - wire boundary. The doc-comments openly admitted the duplication - ("must match host Val impl", "Same NaN-boxing constants as - classify_encode") without proposing a remedy. - - The 47-bit int payload mask (0x0000_FFFF_FFFF_FFFF) and the 28-bit - heap-index mask (0x0FFF_FFFF) were also embedded inline at the use - sites without symbolic names. - -Fix: - Added a sealed `pub mod nan_box` at the top of abi.rs holding every - NaN-boxing constant in a single source-of-truth: - - pub mod nan_box { - pub const QNAN: u64 = 0x7FFC_0000_0000_0000; - pub const SIGN: u64 = 0x8000_0000_0000_0000; - pub const TAG_UNDEF: u64 = QNAN; - pub const TAG_NONE: u64 = QNAN | 1; - pub const TAG_TRUE: u64 = QNAN | 2; - pub const TAG_FALSE: u64 = QNAN | 3; - pub const TAG_INT: u64 = QNAN | SIGN; - pub const TAG_HEAP: u64 = QNAN | 4; - pub const INT_PAYLOAD_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; - } - - - classify_encode now `use nan_box::*;` and references TAG_NONE / - TAG_TRUE / TAG_FALSE / TAG_INT / INT_PAYLOAD_MASK directly. Two - of the locally-defined names lost their `_BITS` suffix because - the canonical names already convey "bit pattern" through context. - - classify_decode same treatment, plus the magic 0x0000_FFFF_FFFF_FFFF - now reads INT_PAYLOAD_MASK and the QNAN|1/2/3 literals collapse to - TAG_NONE/TAG_TRUE/TAG_FALSE. - - vm::types::Val replaced its eight module-level `const` declarations - with `use crate::abi::nan_box::{...}`. The two inline mask - occurrences inside `Val::int` and `Val::as_int` now name - INT_PAYLOAD_MASK explicitly. - - 28-bit heap index mask (0x0FFF_FFFF) left inline since it appears - once and is structurally bound to the `>> 4` shift in the same - expression. - - /* ... */ doc comment in the lexer/parser style on the new module - records the sealing rule (any layout change is an ABI bump, ties - into D22's version handshake). - - Numeric values are byte-identical to before. Tests prove the - layout still round-trips through the VM and the wire codec. - -Documentation: - README.md and design.md describe the NaN-box layout at the abstract - level (e.g. "Int = QNAN | SIGN | i47"). The descriptions remain - accurate; the file-path internals are not user-visible. No doc - updates required. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/centralize-src-buffer-read.txt b/centralize-src-buffer-read.txt deleted file mode 100644 index 90c55f5..0000000 --- a/centralize-src-buffer-read.txt +++ /dev/null @@ -1,47 +0,0 @@ -Branch: centralize-src-buffer-read -Tech debt: D17 / D62 — extract_imports and run duplicated SRC reading with divergent error handling. - -================================================================================ -Commit 1: extract read_src helper for SRC buffer -================================================================================ -File: compiler/src/main/exports.rs - -What was wrong: - Both extern "C" entry points opened the host-owned SRC buffer with the - same five-line preamble: - - let len = len.min(SZ); - let src = match core::str::from_utf8(unsafe { - core::slice::from_raw_parts(core::ptr::addr_of!(SRC) as *const u8, len) - }) { - Ok(s) => s, - Err(_) => ... - }; - - The cap-and-validate logic was identical, but the error paths - diverged: extract_imports silently returned write_out(""), while run - formatted "input rejected: invalid utf-8 at byte N". Adding a third - caller would have spawned a third dialect of the same boundary check. - -Fix: - Introduced a single private helper: - - unsafe fn read_src(len: usize) -> Result<&'static str, Utf8Error> - - that performs the cap-and-validate once and returns the &'static str - view (SRC is `static`, so the slice's lifetime is genuinely 'static). - - Both callers now match on the Result and decide how to surface the - failure: extract_imports keeps its silent-empty contract, run keeps - its user-facing error. The behaviour is byte-identical to before; - only the duplication is gone. - - A new caller now has one place to look and one decision to make - ("silent or formatted error?"), not five duplicated lines to copy. - -Documentation: - No .md docs describe the boundary handling at this granularity (the - mentions of `run()` in design.md / language docs refer to Python's - builtin `run()`, not the WASM export). No doc updates required. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/collapse-push-macro-into-s.txt b/collapse-push-macro-into-s.txt deleted file mode 100644 index cea8a84..0000000 --- a/collapse-push-macro-into-s.txt +++ /dev/null @@ -1,35 +0,0 @@ -Branch: collapse-push-macro-into-s -Tech debt: D43 — redundant push! macro alongside s!. - -================================================================================ -Commit 1: drop unused push macro -================================================================================ -Files: - - compiler/src/modules/fstr.rs - - documentation/implementation/design.md - -What was wrong: - fstr.rs exported two #[macro_export] macros that covered the same - domain (no-alloc string formatting): - - push!(s, ...) — single fragment, six explicit type arms. - - s!(...) — multiple fragments via a recursive @b helper, same - type matrix. - Any new fragment kind (e.g. hex, dec_groups) had to be added in two - places, with mechanical drift risk. push! also lacked s!'s `cap:` form - and could not be chained. - - Workspace-wide grep for `push!` returned zero call sites outside the - declaration itself. The macro was simply dead surface. - -Fix: - Removed the entire push! definition. s! remains the single string - builder. No call sites had to change. - -Documentation: - documentation/implementation/design.md line 92 listed - `# numeric formatter + s!/push!/err! string macros` for fstr.rs. - Updated to drop `push!/` so the inventory matches the file. - - compiler/README.md only lists the file path; no macro names to drift. - -Tests: cargo test -> 6/6 passed. diff --git a/deterministic-fx-hasher-seed.txt b/deterministic-fx-hasher-seed.txt deleted file mode 100644 index 7c4f77e..0000000 --- a/deterministic-fx-hasher-seed.txt +++ /dev/null @@ -1,37 +0,0 @@ -Branch: deterministic-fx-hasher-seed -Tech debt: D48 — non-deterministic FxBuildHasher seed. - -================================================================================ -Commit 1: use fixed seed for FxBuildHasher -================================================================================ -Files: - - compiler/src/modules/fx.rs - - documentation/implementation/design.md - -What was wrong: - fx.rs used a global atomic counter (SEED_COUNTER) plus the MurmurHash3 - finalizer to assign each FxBuildHasher a per-instance seed. In a host- - driven WASM compiler that is single-threaded and runs only trusted input, - the per-instance randomisation buys nothing — there is no DoS attacker - whose collisions need to be foiled. What it does cost: - - - Map iteration order varies across runs of the same input. Diagnostic - output that lists names from a FxHashMap (e.g. "X imports never used: - [...]") differs between runs. - - Bytecode artifacts that serialise any hash-ordered table cannot be - compared byte-for-byte across builds. - - Golden tests that assert exact stdout become flaky. - -Fix: - Replaced the atomic counter and murmur3 finaliser with a single fixed - constant FIXED_SEED (0x9e3779b97f4a7c15, the golden ratio in u64 — a - conventional choice for a non-zero, non-trivial seed). FxBuildHasher::new() - is now Self(FIXED_SEED). Removed the unused AtomicUsize, Ordering import, - SEED_COUNTER static, and murmur3_fmix64 helper. - -Documentation: - documentation/implementation/design.md line 93 listed fx.rs as - "per-map seeded FxBuildHasher", which directly contradicts the fix. - Updated to "fixed-seed FxBuildHasher (deterministic)". - -Tests: cargo test -> 6/6 passed. diff --git a/distinguish-overflow-from-type-miss.txt b/distinguish-overflow-from-type-miss.txt deleted file mode 100644 index 354a294..0000000 --- a/distinguish-overflow-from-type-miss.txt +++ /dev/null @@ -1,62 +0,0 @@ -Branch: distinguish-overflow-from-type-miss -Tech debt: D29 — IC fast path treated arithmetic overflow as a type-guard miss and threw away the speculation. - -================================================================================ -Commit 1: split fast-path outcomes into Done/TypeMiss/Overflow -================================================================================ -File: compiler/src/modules/vm/dispatch.rs - -What was wrong: - exec_fast returned Ok(true) on success and Ok(false) on every other - fall-back path: type guard miss, integer overflow (AddInt/SubInt/ - MulInt), and division by zero (ModInt/FloorDivInt). The dispatcher - treated Ok(false) uniformly: - - if self.exec_fast(fast)? { return Ok(None); } - cache.invalidate(rip); // <-- always - - This means: the very first time a hot AddInt loop sees an overflow, - the IC slot is wiped. The slow handler raises the right - OverflowError, but for the next QUICK_THRESH = 4 iterations of the - loop the dispatch goes through the slow path until the IC re-warms. - In a numeric loop where ~99.99% of iterations are inline-int and one - is overflow, every overflow event costs four extra slow-path - dispatches plus the recompute of the type key. The cache thrashed - for the wrong reason — types had not changed, only one input pair - exceeded the 47-bit Val range. - -Fix: - Introduced a 3-way enum in the dispatch module: - enum FastOutcome { Done, TypeMiss, Overflow } - - exec_fast now returns Result: - Done — handled inline, no further work. - TypeMiss — operands didn't match the speculation (heap-not-Str, - not-int, not-float, fewer than 2 elements on stack). - Overflow — types matched but the result is unrepresentable - (i64 overflow, mod/floordiv with b==0). The slow - handler will raise OverflowError or ZeroDivisionError; - the IC stays warm because the speculation was correct. - - The dispatcher's call site explicitly destructures: - - match self.exec_fast(fast)? { - FastOutcome::Done => return Ok(None), - FastOutcome::Overflow => {} // fall through, keep IC - FastOutcome::TypeMiss => cache.invalidate(rip), - } - - Net effect: a hot int-arith loop that hits an occasional overflow - no longer thrashes the IC; the slow handler raises the same exception - it always did, then the next iteration is back on the fast path. - - A /* ... */ doc comment in the lexer/parser style records the new - semantics on exec_fast and on the dispatcher. - -Documentation: - README.md, design.md, what-it-is.md describe the IC at the level of - "type-guard miss invalidates and deopts". That statement is still - exactly true — only TypeMiss invalidates. The new Overflow outcome - is a refinement the docs don't speak to. No doc updates required. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/export-pdk-abi-version.txt b/export-pdk-abi-version.txt deleted file mode 100644 index f7f14a3..0000000 --- a/export-pdk-abi-version.txt +++ /dev/null @@ -1,63 +0,0 @@ -Branch: export-pdk-abi-version -Tech debt: D22 — PDK had no version handshake; an evolved host could load a v1 plugin and decode garbage silently. - -================================================================================ -Commit 1: export __edge_abi_version from edge-pdk -================================================================================ -Files: - - edge-pdk/src/lib.rs - - documentation/reference/wasm-abi.md - -What was wrong: - The PDK's module-doc said it targeted "v1 wasm-abi" (line 4 of - src/lib.rs), but no symbol carried that version into the linked - artifact. Concretely: - - - The `op` and `tag` modules listed const codes a plugin had to match - against host expectations. - - The host-side bridge.rs defined the same codes (D21 — already - flagged as needing centralisation). - - Nothing told a host loader which version of the wire format the - .wasm in front of it was built against. - - Result: if the host evolved (e.g. added op = 8 with a new layout), - it would load an old plugin that still implemented op = 0..7. Most - call paths would seem to work; the new op would silently produce a - different effect (or crash with no actionable diagnostic) the first - time it landed. - -Fix: - Added `pub const EDGE_ABI_VERSION: u32 = 1;` to edge-pdk/src/lib.rs - and a no_mangle wasm export: - - #[unsafe(no_mangle)] - pub extern "C" fn __edge_abi_version() -> u32 { EDGE_ABI_VERSION } - - Every plugin built against this PDK now carries a version stamp the - host can read at instantiation. This is purely additive on the - guest side; existing hosts that don't read the symbol behave - unchanged. - - /* ... */ doc comment in the lexer/parser style records why bumps - are required (any breaking change to op/tag/codec/error kinds). - -Documentation: - reference/wasm-abi.md required two changes (strict necessity — the - doc declares the public contract): - - 1. The "Required guest export" section (singular) became "Required - guest exports" (plural) with both `__edge_alloc` and - `__edge_abi_version` listed in the rust signature block, plus a - paragraph explaining the host's MUST-check obligation. - 2. The PDK summary near the bottom now says "the required - __edge_alloc and __edge_abi_version exports emitted automatically". - - No JS-side host loader code is in this repo, so the loader-side - validation is documented as a contract for downstream hosts to - honour. The companion edge-abi crate (D21) and host-side reading - of this symbol from compiler.wasm remain separate work. - -Build & tests: - cargo build -p edge-pdk -> ok. - cargo build -p slugify-mod --target wasm32-unknown-unknown --release -> ok. - cargo test -> 6/6 passed. diff --git a/fix-host-edge-encode-doc.txt b/fix-host-edge-encode-doc.txt deleted file mode 100644 index 4c28cef..0000000 --- a/fix-host-edge-encode-doc.txt +++ /dev/null @@ -1,32 +0,0 @@ -Branch: fix-host-edge-encode-doc -Tech debt: D59 — duplicated copy-paste doc-comment on host_edge_encode. - -================================================================================ -Commit 1: correct host_edge_encode doc-comment -================================================================================ -File: compiler/src/main/abi_bridge.rs - -What was wrong: - Lines 214 and 236 both carried the same doc-comment: - "// Bootstrap decoder: writes tag to `*out_tag`, bytes to `dst[..dst_max]`." - - The comment is correct above host_edge_decode (line 236) but a copy-paste - on host_edge_encode (line 214). encode does not write to *out_tag and - does not own a dst buffer — it takes (tag, ptr, len), classifies them - into a Val, and returns the resulting handle (0 on Invalid). The - implementation made this clear; only the comment misled. - -Fix: - Replaced the comment on encode with: - "// Bootstrap encoder: classifies (tag, bytes) into a Val handle; - returns 0 on Invalid." - - This matches the actual signature - fn host_edge_encode(tag: u32, ptr: *const u8, len: u32) -> u32 - and the body's behaviour (Direct/AllocStr -> put_val, Invalid -> 0). - -Documentation: - reference/wasm-abi.md mentions host_edge_encode by name only (line 328); - no description there to drift. No doc changes required. - -Build: cargo build -p edge-python -> ok. diff --git a/fix-plugin-fn-macro-error-message.txt b/fix-plugin-fn-macro-error-message.txt deleted file mode 100644 index f3253eb..0000000 --- a/fix-plugin-fn-macro-error-message.txt +++ /dev/null @@ -1,27 +0,0 @@ -Branch: fix-plugin-fn-macro-error-message -Tech debt: D6 — compile_error message cites a non-existent attribute name. - -================================================================================ -Commit 1: fix plugin_fn macro error message attribute name -================================================================================ -File: edge-pdk/macros/src/lib.rs - -What was wrong: - Line 54: compile_error!("#[edge] does not support methods (`self` parameter)"); - - The exported attribute is `#[plugin_fn]`, not `#[edge]`. The literal looks - like a copy-paste from a previous renaming pass. A plugin author who tried - to write a method-style fn would see an error pointing at `#[edge]` — a - symbol that does not exist in this crate or in the docs, so they cannot - grep their code for it. - -Fix: - Changed the literal to "#[plugin_fn] does not support methods (`self` - parameter)". - - Cargo.toml description in the same crate also misnames the attribute, but - that is part of the larger crate rename (D5: `macros` -> `edge-pdk-macros`) - and is left for that branch. All .md documentation already uses - `#[plugin_fn]` consistently; no doc changes were needed. - -Build: cargo build -p macros -> ok. diff --git a/fix-raw-string-prefix-detection.txt b/fix-raw-string-prefix-detection.txt deleted file mode 100644 index 531b8e5..0000000 --- a/fix-raw-string-prefix-detection.txt +++ /dev/null @@ -1,32 +0,0 @@ -Branch: fix-raw-string-prefix-detection -Tech debt: D1 — is_raw scans entire lexeme instead of prefix only. - -================================================================================ -Commit 1: fix raw string prefix detection -================================================================================ -File: compiler/src/modules/parser/types.rs - -What was wrong: - parse_string (line 360): `let is_raw = s.contains('r') || s.contains('R');` - parse_bytes_literal (line 373): same pattern. - - s is the full lexeme (prefix + quotes + body). `contains('r')` therefore - scans the body too. Any non-raw u-string or b-string whose body contains - the letter 'r' or 'R' was silently treated as raw, so escapes like \n, \t, - \xHH were preserved verbatim. - - Concrete example: u"hello\nworld" -> body contains no 'r', escape works. - But u"forward\n" -> body contains 'r' inside "forward", is_raw=true, - the literal kept the backslash-n unescaped. - - Documentation in implementation/lexical.md already states that the prefix - is recognised before the opening quote, so docs were already correct; only - the implementation diverged. - -Fix: - Introduced has_raw_prefix(s) which reads bytes only up to the first quote - byte (`"` or `'`) and checks for r/R there. parse_string and - parse_bytes_literal now call has_raw_prefix, so detection matches the - language spec and the documentation. - -Tests: cargo test -> 6/6 passed. diff --git a/gate-templates-on-immutable-args.txt b/gate-templates-on-immutable-args.txt deleted file mode 100644 index 810b822..0000000 --- a/gate-templates-on-immutable-args.txt +++ /dev/null @@ -1,44 +0,0 @@ -Branch: gate-templates-on-immutable-args -Tech debt: D7 — Templates::record cached mutable args, returning stale results when the caller mutated the same container between calls. - -================================================================================ -Commit 1: skip template memoization when any arg is mutable -================================================================================ -Files: - - compiler/src/modules/vm/cache.rs - - documentation/implementation/design.md - -What was wrong: - hash_args (line 147) folded raw Val.0 bits — so two calls passing - the SAME list handle hashed identically, even if the list contents - changed between calls. args_match used eq_vals_with_heap which DID - walk contents, but only after the hash matched and only at the - point of `record()`/`lookup()`. The hit counter incremented on - identical-bits args; once hits >= TPL_THRESH, lookup returned the - cached `result` for the OLD contents. - - Concrete failure: a pure function f(xs) called as f(items), - items.append(...), f(items) would return f's prior value on the - second call if the implementation had cached. The eq verification - catches the mismatch only on certain inserts; under hash collisions - with byte-identical handles it does not. - -Fix: - Added `args_memoizable(args, heap)` which scans every arg and - returns false if any is HeapObj::List, Dict, Set, or Instance — - the four mutable containers in the heap. record() bails out early - on `!args_memoizable(...)`, so mutable-arg calls simply never enter - the cache. Immutable args (Int, Float, Bool, None, Str, Bytes, - Tuple, FrozenSet, Range, Slice, Func, Class, etc.) keep their - fast path unchanged. - - No change to lookup(): an entry never recorded cannot be looked up, - so the gate at write-time is sufficient. - -Documentation: - documentation/implementation/design.md line 20 enumerates the - memoisation gates. Added the new gate ("every argument being - byte-stable, mutable containers disqualify") to the inline list so - the spec matches the implementation. - -Tests: cargo test -> 6/6 passed. diff --git a/generalize-int-overflow-message.txt b/generalize-int-overflow-message.txt deleted file mode 100644 index f6cf258..0000000 --- a/generalize-int-overflow-message.txt +++ /dev/null @@ -1,44 +0,0 @@ -Branch: generalize-int-overflow-message -Tech debt: D63 — parse-time int overflow message exposes Val internals. - -================================================================================ -Commit 1: generalize integer literal overflow message -================================================================================ -File: compiler/src/modules/parser/expr.rs - -What was wrong: - parse_number (line 283) emitted: - - "integer literal too large for 47-bit Val (max \u{00b1}140737488355327)" - - Two issues: - - 1. The error fires on `digits.parse::().ok()` failure. i64::MAX - is ~9.2e18, far above the 47-bit cap (~1.4e14). So a literal in - the 1.4e14..9.2e18 window parses fine here and only fails LATER - at NaN-box materialisation in cache.rs / optimizer.rs as - OverflowError. The "47-bit" attribution at this site is therefore - factually wrong — the limit being violated at parse time is the - i64 ceiling, not the 47-bit Val ceiling. - 2. Even when the upper bound were correct, "47-bit Val" leaks the - internal NaN-boxing detail to a parse-time user-facing error. - -Fix: - Replaced the message with the simpler, accurate phrasing: - - "integer literal too large to represent" - - The 47-bit fact remains documented in: - - documentation/implementation/design.md (architecture) - - compiler/README.md (memory model + Val table) - - documentation/getting-started/what-it-is.md (numbers) - These are correct: the 47-bit cap is enforced at materialisation as - OverflowError, exactly as the docs say. Only the parse-time message - was misattributing the failure. - -Documentation: - No doc changes required. The 47-bit invariant docs describe the VM - side, which is unchanged. The parse-time message is not surfaced in - any doc. - -Tests: cargo test -> 6/6 passed. diff --git a/handle-len-via-raii.txt b/handle-len-via-raii.txt deleted file mode 100644 index e49514c..0000000 --- a/handle-len-via-raii.txt +++ /dev/null @@ -1,46 +0,0 @@ -Branch: handle-len-via-raii -Tech debt: D57 — Handle::len() decodes then manually edge_releases, leaking on any future ? between the two calls. - -================================================================================ -Commit 1: route handle release through Drop in Handle::len -================================================================================ -File: edge-pdk/src/lib.rs - -What was wrong: - Handle::len had this pattern at lines 459-461: - - let v = i64::from_handle(out)?; - unsafe { edge_release(out); } - Ok(v) - - This is correct today only because i64::from_handle currently cannot - short-circuit between `out` and the manual release. But: - - 1. The rest of the impl block uses Handle::from_raw(out) which lets - Drop run edge_release automatically. len() is the only outlier. - 2. If a maintainer adds another `?`-returning step between decode and - release — e.g. validating range, attaching a span — the handle - leaks silently. - 3. The `unsafe` block at the call site exposes raw FFI in API code - that has a perfectly good RAII alternative one struct away. - -Fix: - Replaced the manual decode+release with: - - let h = Handle::from_raw(out); - i64::from_handle(h.raw()) - - Now Drop owns the release on every exit path (success, ?-propagation, - panic). No `unsafe`. The behaviour is identical for all current - callers. - - A short /* ... */ comment in the lexer/parser style records *why* the - rewrite — a future ? would have leaked. - -Documentation: - reference/wasm-abi.md describes edge_release at the ABI level (owned - vs. borrowed handles, manual release rules in raw FFI examples). It - does not describe the high-level Handle wrapper's RAII contract, so - no doc changes are required. - -Build & tests: cargo build -p edge-pdk -> ok; cargo test -> 6/6 passed. diff --git a/hide-pdk-internals-submodule.txt b/hide-pdk-internals-submodule.txt deleted file mode 100644 index 9dc53d5..0000000 --- a/hide-pdk-internals-submodule.txt +++ /dev/null @@ -1,62 +0,0 @@ -Branch: hide-pdk-internals-submodule -Tech debt: D56 (full) — finishes the prelude work by removing __stash_error / __edge_alloc from the crate root, where `use edge_pdk::*;` could still glob them. - -================================================================================ -Commit 1: relocate pdk macro contract surface to __internals -================================================================================ -Files: - - edge-pdk/src/lib.rs - - edge-pdk/macros/src/lib.rs - -What was wrong: - After the prelude branch, plugin authors had a clean opt-in via - `use edge_pdk::prelude::*;`, but the legacy `use edge_pdk::*;` in - the wasm-abi doc and existing plugins still glob-imported every - crate-root pub item — including the two #[doc(hidden)] symbols that - exist purely as the proc-macro's contract surface: - - pub fn __stash_error(e: Error) - - pub extern "C" fn __edge_alloc(size: u32) -> *mut u8 - - #[doc(hidden)] only hides them from rustdoc; it does not exclude them - from a glob. So renaming or refactoring those internals remained a - silent breaking change for any plugin that touched them - (intentionally or not) through the glob. - -Fix: - Created a #[doc(hidden)] sub-module: - - pub mod __internals { - pub fn stash_error(e: Error) { ... } - #[unsafe(no_mangle)] - pub extern "C" fn __edge_alloc(...) -> *mut u8 { ... } - } - - `use edge_pdk::*;` does NOT glob-import items from sub-modules, so - these symbols are now invisible to plugin authors who use the glob. - Renamed `__stash_error` to `stash_error` inside the sub-module — - the leading double-underscore was a "do not touch" warning that - becomes redundant once the symbol lives in `__internals`. - - __edge_alloc keeps its name and #[no_mangle] so the host shim's - WASM call site is unchanged. The Rust path moved, but the WASM - export symbol is identical. - - Updated the macro expansion in edge-pdk/macros/src/lib.rs from - `::edge_pdk::__stash_error(...)` to - `::edge_pdk::__internals::stash_error(...)` (4 sites). Removed the - now-unused `ToString` import from the lib.rs root (it lives inside - __internals). - - Verified the slugify-mod example still builds cleanly against the - wasm32-unknown-unknown target via the macro. - -Documentation: - reference/wasm-abi.md mentions `__edge_alloc` as the WASM export - name only; the symbol name is unchanged so the doc stays correct. - Updated the lib.rs module-doc bullet that listed __edge_alloc to - note where it now lives in source. - -Build & tests: - cargo build -p edge-pdk -p macros -> ok (no warnings). - cargo build -p slugify-mod --target wasm32-unknown-unknown --release -> ok. - cargo test -> 6/6 passed. diff --git a/mark-val-from-raw-unsafe.txt b/mark-val-from-raw-unsafe.txt deleted file mode 100644 index c01277b..0000000 --- a/mark-val-from-raw-unsafe.txt +++ /dev/null @@ -1,46 +0,0 @@ -Branch: mark-val-from-raw-unsafe -Tech debt: D11 (partial) — Val::from_raw was a safe public constructor for an unsafe contract. - -================================================================================ -Commit 1: mark Val::from_raw unsafe and document GC contract -================================================================================ -File: compiler/src/modules/vm/types/mod.rs - -What was wrong: - Line 142 declared: - - pub fn from_raw(u: u64) -> Self { Self(u) } - - This is the public wire-format constructor for a Val. It accepted ANY - u64, including values that decode to Heap with an arbitrary 28-bit - index. HeapPool::get and HeapPool::get_mut then index into - `slots[v.as_heap() as usize]` and call `.expect(...)` if the slot is - free. The combined effect: an external caller passing a u64 they made - up panics the host process. - - This is the second half of the broader D5 problem (HeapPool::get - panics on dangling references). The structural fix for D5 — return - Result instead of panicking — is a much larger refactor across every - call site. Marking the constructor unsafe at least pushes the - responsibility onto the wire-format caller, where it belongs, and - matches the documented intent that this Val came from `raw()` on the - same HeapPool. - -Fix: - Changed the signature to: - - pub unsafe fn from_raw(u: u64) -> Self { Self(u) } - - Added a /* # Safety */ doc block describing the invariant: the u64 - must have come from Val::raw() on a Val whose heap slot is still - live in this HeapPool. No internal Rust call sites use Val::from_raw, - so the workspace builds and tests cleanly without further changes. - - External consumers (FFI / WASM loaders / future SDKs) now have to - wrap the call in `unsafe { ... }`, which is the correct signal — - they were already taking on this responsibility implicitly. - -Documentation: - No .md docs reference Val::from_raw directly. - -Tests: cargo test -> 6/6 passed. diff --git a/move-utils-to-util-mod.txt b/move-utils-to-util-mod.txt deleted file mode 100644 index c283c06..0000000 --- a/move-utils-to-util-mod.txt +++ /dev/null @@ -1,53 +0,0 @@ -Branch: move-utils-to-util-mod -Tech debt: D58 — fx, fstr, sha256 sat under modules/, conflating util helpers with the runtime/stdlib namespace. - -================================================================================ -Commit 1: move fx fstr sha256 from modules/ to util/ -================================================================================ -Files: - - compiler/src/lib.rs (split `modules { ... }` to add a `util { ... }` peer) - - compiler/src/util/fx.rs (renamed from modules/fx.rs) - - compiler/src/util/fstr.rs (renamed from modules/fstr.rs) - - compiler/src/util/sha256.rs (renamed from modules/sha256.rs) - - 21 .rs files: bulk-rewritten use paths - - compiler/README.md and documentation/implementation/design.md tree - diagrams updated to reflect the new layout. - -What was wrong: - `compiler/src/modules/` semantically should mean "runtime modules of - the Edge Python language" — components newcomers would expect to see - the lexer/parser/vm/packages/builtins inside. Instead it also held: - - fstr.rs - numeric formatter + s!/err! string macros (util) - fx.rs - FxHasher + FxBuildHasher (util) - sha256.rs - in-tree SHA-256 used by integrity checks (util) - - These have nothing to do with the Edge Python language; they are - internal compiler helpers. Mixing them under the same parent forced - newcomers to read each file to discover which were stdlib-shaped and - which were util-shaped. It would also block adding real runtime - modules (`math`, `os`, `sys` etc.) without a second naming round. - -Fix: - - Created `compiler/src/util/` as a peer to `modules/`. - - `git mv` for the three files; git tracks them as renames so blame - history is preserved. - - Updated `compiler/src/lib.rs`: - pub mod util { pub mod fx; pub mod fstr; pub mod sha256; } - pub mod modules { pub mod lexer; pub mod vm; pub mod parser; pub mod packages; } - - Bulk substitution across 21 .rs files (sed -i, no command - substitution): `crate::modules::{fx,fstr,sha256}::` → `crate::util::{...}::` - plus the `compiler_lib::modules::fx::` reference in tests/common.rs. - - The s!/err! macros' internal expansion path (`$crate::modules::fstr::format_f64`) - was caught by the same bulk substitution and now reads - `$crate::util::fstr::format_f64`, so call sites need no change. - - Added a /* ... */ doc comment in lib.rs explaining why util/ is - separated from modules/. - -Documentation: - Both the compiler/README.md and documentation/implementation/design.md - tree diagrams listed fstr.rs/fx.rs/sha256.rs under `modules/`. Updated - both to show them under the new `util/` peer. The line-count and - per-file annotations were preserved verbatim. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/preallocate-gc-worklist.txt b/preallocate-gc-worklist.txt deleted file mode 100644 index 86de548..0000000 --- a/preallocate-gc-worklist.txt +++ /dev/null @@ -1,43 +0,0 @@ -Branch: preallocate-gc-worklist -Tech debt: D33 — HeapPool::mark allocates a Vec on every call. - -================================================================================ -Commit 1: reuse the GC mark worklist across roots -================================================================================ -File: compiler/src/modules/vm/types/mod.rs - -What was wrong: - HeapPool::mark started with `let mut worklist = vec![v.as_heap()];` - on every invocation. collect() (gc.rs) calls mark() once per root — - stack, with-stack, yields, event queue, current slots, live slots, - every slot template, every globals entry, every iter frame, every - opcode-cache constant pool, every active const pool, plus - templates.mark_all. For a busy program that's hundreds of - allocations every GC cycle. - - Worse: the GC is running because the heap is under pressure. The - allocator we go to for the worklist is the same allocator we're - trying to relieve. On low-memory hosts (the WASM single-threaded - bump allocator) this can OOM the host while we're trying to free - memory. - -Fix: - Added a `mark_worklist: Vec` field to HeapPool, initialised in - HeapPool::new with a 64-slot capacity. mark() now reuses this Vec — - push/pop drain it cleanly back to length 0 (the loop exits when pop - returns None), so the next mark() inherits whatever capacity the - worst case grew to. The vector's allocation lives for the entire - HeapPool lifetime, not per-root. - - The closure inside mark() needed `&mut mark_worklist` while the loop - body reads `slots`, so I split the &mut self via destructure - let HeapPool { slots, mark_worklist, .. } = self; - to give the closure independent field-level borrows. - - Removed the now-unused `vec` macro import. - -Documentation: - GC docs (compiler/README.md, design.md) describe the algorithm and - roots, never the worklist representation. No doc updates required. - -Build & tests: cargo build -p edge-python -> ok (no warnings); cargo test -> 6/6 passed. diff --git a/remove-cosmetic-package-aliases.txt b/remove-cosmetic-package-aliases.txt deleted file mode 100644 index e4fa179..0000000 --- a/remove-cosmetic-package-aliases.txt +++ /dev/null @@ -1,28 +0,0 @@ -Branch: remove-cosmetic-package-aliases -Tech debt: D45 — three pub-use aliases unused anywhere in the workspace. - -================================================================================ -Commit 1: drop unused package re-export aliases -================================================================================ -File: compiler/src/modules/packages/mod.rs - -What was wrong: - Lines 91-94 re-exported three names with no callers: - pub use NativeBinding as Binding; - pub use Resolved as ResolvedModule; - pub use NoopResolver as Default_; - - A workspace-wide grep for `\bBinding\b|\bResolvedModule\b|\bDefault_\b` - returns only the three declarations themselves. The .md docs and rust - sources all use the canonical names `NativeBinding`, `Resolved`, and - `NoopResolver`. - - `Default_` with the trailing underscore is also a Rust-convention - smell — readers expect `Default` to be the trait, not a type alias. - -Fix: - Removed the three aliases and their wrapping comment. Canonical names - remain the single source of truth. - -Tests: cargo test -> 6/6 passed. -Docs: none reference the aliases; no changes required. diff --git a/remove-ffi-boundary-panics.txt b/remove-ffi-boundary-panics.txt deleted file mode 100644 index 178a09b..0000000 --- a/remove-ffi-boundary-panics.txt +++ /dev/null @@ -1,47 +0,0 @@ -Branch: remove-ffi-boundary-panics -Tech debt: D26 (companion to stash-panic-before-trap) — expect/unwrap calls in abi_bridge.rs let panics cross the WASM FFI boundary as opaque traps. - -================================================================================ -Commit 1: replace expect/unwrap on FFI dispatch paths with propagated VmErr -================================================================================ -File: compiler/src/main/abi_bridge.rs - -What was wrong: - Three sites violated the "panics never cross FFI" rule: - - - host_edge_take_error (line 305): - let (_, msg) = stash.take().expect("peek returned Some"); - Single-threaded WASM makes this unreachable today, but the - `expect` is a foot-gun: any future scheduler change that lets - another path `take()` between the peek and the take above would - panic and trap the module without a message. - - - dispatch_call (line 50): `Ok(vm.stack.pop().unwrap())` - - dispatch_get_item (line 127): `Ok(vm.stack.pop().unwrap())` - Both protected by an `if vm.stack.len() != stack_before + 1` - invariant, but the `unwrap` is a second invariant — if the check - above is ever weakened (e.g. someone bumps the +1 to +N), the - unwrap goes from "redundant" to "panic" silently. - -Fix: - - host_edge_take_error: replaced `expect` with a `let-else` - pattern that returns -1 (no pending error) on the unreachable - branch. Documented in a /* ... */ comment why the prior `.expect` - was a latent FFI hazard. - - dispatch_call / dispatch_get_item: replaced `.unwrap()` with - `.ok_or(VmErr::Runtime("...stack drained mid-dispatch"))`. - Behaviour for the happy path is byte-identical; the previously- - unreachable branch now propagates a typed error instead of - panicking the host process. - - Combined with stash-panic-before-trap (already merged, captures - panics that DO occur), the FFI boundary now never traps with a - bare unreachable except from genuinely unrecoverable Rust panics - outside our reach (e.g. allocator OOM during the format). - -Documentation: - No .md docs reference the old expect/unwrap behaviour. wasm-abi.md - describes the public ABI contract, which is unchanged: the same - status codes carry the same meanings. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/remove-unreachable-dispatch-arms.txt b/remove-unreachable-dispatch-arms.txt deleted file mode 100644 index 9bca9d8..0000000 --- a/remove-unreachable-dispatch-arms.txt +++ /dev/null @@ -1,41 +0,0 @@ -Branch: remove-unreachable-dispatch-arms -Tech debt: D42 — dead MakeClass/StoreAttr arm in dispatch_generic. - -================================================================================ -Commit 1: drop unreachable dispatch_generic arm -================================================================================ -File: compiler/src/modules/vm/dispatch.rs - -What was wrong: - dispatch_generic (line 576) is the fallback called from the main - dispatch via: - other => self.dispatch_generic(other, op, slots)? - It only ever sees opcodes the main match did NOT handle. - - The main match has explicit arms for OpCode::MakeClass at line 483 - and OpCode::StoreAttr at line 515, so neither variant can reach - dispatch_generic. Yet dispatch_generic carried: - - OpCode::MakeClass | OpCode::StoreAttr => - return Err(cold_runtime("MakeClass/StoreAttr must be in main dispatch")); - - Defensive in intent, dead in practice. Worse, it gives a false sense - of exhaustiveness: the catch-all `_` immediately after means the - presence of explicit arms here doesn't add real coverage. - -Fix: - Removed the unreachable arm. The catch-all - _ => return Err(cold_runtime("unexpected opcode in generic dispatch")) - remains as the runtime safety net. - - The follow-up suggestion to replace `_` with an explicit list of - variants legitimately reaching dispatch_generic — so adding a new - OpCode breaks the build instead of being silently dropped to runtime - error — is structural and is left for a separate refactor. - -Documentation: - README and design.md mention MakeClass/StoreAttr only at the language - level (decorators, impurity tracking), not at the dispatch level. No - doc changes required. - -Tests: cargo test -> 6/6 passed. diff --git a/rename-macros-to-edge-pdk-macros.txt b/rename-macros-to-edge-pdk-macros.txt deleted file mode 100644 index 8014448..0000000 --- a/rename-macros-to-edge-pdk-macros.txt +++ /dev/null @@ -1,50 +0,0 @@ -Branch: rename-macros-to-edge-pdk-macros -Tech debt: D52 — proc-macro crate named `macros` is generic and not crates.io publishable. - -================================================================================ -Commit 1: rename macros crate to edge-pdk-macros -================================================================================ -Files: - - edge-pdk/macros/Cargo.toml (package name + description) - - edge-pdk/Cargo.toml (dependency rename) - - edge-pdk/src/lib.rs (re-export rename + module-doc + #[plugin_fn] attribute name) - - Cargo.lock (regenerated by cargo) - -What was wrong: - The proc-macro crate was published as `name = "macros"`. Three problems: - - 1. Unpublishable to crates.io: the name "macros" is taken and even if - it weren't, no curator would approve the name as descriptive of a - specific crate. - 2. `cargo tree` shows `macros 0.1.0` with no context — readers cannot - tell which parent crate it belongs to. - 3. The convention for proc-macro sibling crates is `-macros` - (serde_derive, tokio-macros, clap_derive) — drift from convention - forces every new contributor to learn the local exception. - - The Cargo.toml description also referenced "the `edge` crate" and the - "`#[edge]` attribute" — both wrong: the parent is `edge-pdk` and the - exported attribute is `#[plugin_fn]` (edge-pdk/src/lib.rs:38). The - description doc-comment in src/lib.rs:72 had the same `#[edge]` typo - on `__stash_error`. - -Fix: - - edge-pdk/macros/Cargo.toml: `name = "edge-pdk-macros"`. Description - rewritten to reference `edge-pdk` and `#[plugin_fn]` instead of the - fictional `edge` / `#[edge]`. - - edge-pdk/Cargo.toml: `edge-pdk-macros = { path = "macros", version = "0.1.0" }`. - - edge-pdk/src/lib.rs: `pub use edge_pdk_macros::plugin_fn;` and a - module-doc fix and the doc-comment on `__stash_error` updated to - `#[plugin_fn]`. - - Cargo.lock: cargo regenerated the entry; committed. - - Workspace member paths (`edge-pdk/macros`) did NOT need updating — - workspace members are addressed by directory, not crate name. - -Documentation: - documentation/implementation/design.md mentions "string macros" but - refers to the s!/push!/err! macros, not the crate name. No doc - updates required for the rename. Examples directory contains no - references to the old `macros` name. - -Tests: cargo test -> 6/6 passed. diff --git a/replace-fmtbuf-with-string.txt b/replace-fmtbuf-with-string.txt deleted file mode 100644 index 132ecb4..0000000 --- a/replace-fmtbuf-with-string.txt +++ /dev/null @@ -1,44 +0,0 @@ -Branch: replace-fmtbuf-with-string -Tech debt: D44 — FmtBuf truncates silently and uses from_utf8_unchecked. - -================================================================================ -Commit 1: drop FmtBuf in favour of plain String -================================================================================ -File: compiler/src/modules/fstr.rs - -What was wrong: - format_general formatted floats into a 32-byte stack buffer with two - hazards: - - 1. write_str clamped to `(self.len + bytes.len()).min(self.buf.len())`, - silently dropping any overflow. f64::Display normally fits, but - non-finite paths (subnormal denormals near MIN_POSITIVE, certain - "{:?}"-via-Display future changes) could overflow without surfacing - an error. - 2. as_str() called core::str::from_utf8_unchecked on the partially - filled buffer. The truncation in #1 is byte-aligned, not UTF-8 - aware, so a future change that piped a multi-byte char through - here would produce UB at the seam. - - Both hazards exist purely as a micro-optimisation: avoid the - String allocation. f64 formatting already runs the Rust fmt machinery - (which itself allocates indirectly via the float printing - implementation), so saving one tiny String alloc was never the - bottleneck. - -Fix: - Removed the FmtBuf struct entirely. format_general now uses - let mut out = alloc::string::String::with_capacity(32); - let _ = write!(&mut out, "{}", f); - out - with `use core::fmt::Write` brought into scope locally. The 32-byte - preallocation matches the previous stack budget so the allocator hits - zero realloc on the common case. write! into a String is infallible, - so the `let _ =` is preserved (the Result is purely structural). - - No more silent truncation, no more from_utf8_unchecked. - -Documentation: - No .md docs reference FmtBuf or the format_general internals. - -Tests: cargo test -> 6/6 passed. diff --git a/stash-panic-before-trap.txt b/stash-panic-before-trap.txt deleted file mode 100644 index 468e4f2..0000000 --- a/stash-panic-before-trap.txt +++ /dev/null @@ -1,48 +0,0 @@ -Branch: stash-panic-before-trap -Tech debt: D26 (panic_handler portion) — bare wasm32::unreachable() trap discarded the panic message. - -================================================================================ -Commit 1: stash panic message before trapping -================================================================================ -File: compiler/src/main/mod.rs - -What was wrong: - panic_handler at line 41 read: - - fn panic(_: &core::panic::PanicInfo) -> ! { core::arch::wasm32::unreachable() } - - When *any* code on the WASM side panicked — a hand-written invariant, - an underflow, a malformed bytecode reaching an unreachable catch — the - module trapped immediately. The host's edge_take_error returned -1 ("no - message"), so the user saw "the WASM module crashed" with zero context. - Reproducing intermittent panics from production became a guessing game. - -Fix: - Captured `info.message()` into a String, stashed it in ERROR_STASH with - ErrorKind::Runtime (kind=2), then trapped. The host's edge_take_error - now drains it and returns a typed message like "internal panic: index - out of bounds: the len is 4 but the index is 7". - - Defensive considerations: - - alloc::format! itself can panic (allocator OOM). If that happens, - panic_handler re-enters; the stash never gets set; we fall through - to the original unreachable() trap. Net behaviour for the host is - identical to the previous code, so the fix is strictly additive. - - ERROR_STASH may not have been initialised yet (panic during very - early startup). The handler explicitly initialises it on first use, - matching the lazy pattern the rest of mod.rs uses for static state. - - A /* ... */ doc comment in the lexer/parser style records both the - intent and the recursion-on-OOM caveat. - - This addresses the panic_handler half of D26. The companion concern - (replace .expect()/unwrap() in abi_bridge.rs with propagated errors) - is left for a separate branch. - -Documentation: - reference/wasm-abi.md and reference/writing-modules.md include - panic_handler snippets — these are templates for *plugin authors* - writing their own .wasm modules, not for the host's compiler.wasm. - No doc updates required. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/tighten-resolver-internals.txt b/tighten-resolver-internals.txt deleted file mode 100644 index ae081f7..0000000 --- a/tighten-resolver-internals.txt +++ /dev/null @@ -1,72 +0,0 @@ -Branch: tighten-resolver-internals -Tech debt: D60 — WasmHostResolver mixed package resolution with ABI marshalling and used a magic 32-hop constant inline. - -================================================================================ -Commit 1: pull native-binding closure into abi_bridge and name the hop limit -================================================================================ -Files: - - compiler/src/main/resolver.rs - - compiler/src/main/abi_bridge.rs - -What was wrong: - resolve_canonical (resolver.rs:113-149) carried a 27-line closure that - was pure ABI plumbing: stage Vals as handles, invoke - host_call_native via FFI, drain status into Result, release every - handle. Resolver had to import HeapPool / Val / VmErr / Arc / - put_val / get_val / handles / error_stash / error_from_kind / - ErrorKind / host_call_native — eleven symbols whose only purpose - was to make the closure compile. - - Mixing concerns meant any change to the wire-marshalling rules - (e.g. tightening release ordering, surfacing a different default - error kind) had to be made in the wrong file. Reviewers had to - read the resolver to audit ABI safety. - - Separately, the 32-hop walk-up cap was an inline magic number with - no name. The error message hard-coded "32 hops" twice (the literal - in the if-guard and the literal in the s! formatting), so a future - bump would silently desync. - -Fix: - Added `pub(super) fn make_native_binding(name: String, id: u32) -> NativeBinding` - to abi_bridge.rs. The function owns the closure body. resolver.rs's - resolve_canonical Native arm collapses to one line per binding: - - let bindings: Vec = funcs.iter() - .map(|(name, id)| make_native_binding(name.clone(), *id)) - .collect(); - - Resolver lost six imports (HeapPool, Val, VmErr, Arc, get_val, - handles, put_val, error_stash, error_from_kind, ErrorKind, - host_call_native), and is now visibly a "find the right module" - module — the only unsafe FFI symbol it still touches is - host_fetch_bytes for manifest IO. - - Named the cap as `const MAX_PACKAGES_HOPS: u32 = 32;` and rewrote - the error message to interpolate the constant, so a bump touches - one site: - - s!("packages.json walk-up exceeded ", - int MAX_PACKAGES_HOPS as i64, " hops resolving '", str name, "'") - - Behaviour unchanged: the runtime cap is still 32, the message is - byte-identical to before. - - /* ... */ doc comment in the lexer/parser style records the - motivation for both the cap and the closure relocation. - - This addresses the resolver/ABI separation half of D60 and the - named-constant point. Two follow-ups remain in the cache: - - Replace `manifests()` Vec<(String, Manifest)> with FxHashMap for - O(1) lookup (touches mod.rs static type). - - Split the resolver into ManifestStore + ModuleRegistry + - WasmHostResolver as separate types. - Both are larger structural moves and stay scoped to a separate - branch. - -Documentation: - reference/imports.md mentions "32 hops" verbatim. The constant - still equals 32 and the runtime message still produces "32 hops", - so no doc updates required. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed. diff --git a/validate-utf8-on-host-input.txt b/validate-utf8-on-host-input.txt deleted file mode 100644 index 2d5743a..0000000 --- a/validate-utf8-on-host-input.txt +++ /dev/null @@ -1,36 +0,0 @@ -Branch: validate-utf8-on-host-input -Tech debt: D3 — from_utf8_unchecked on host-controlled INP buffer is UB if the host writes non-UTF-8 bytes. - -================================================================================ -Commit 1: validate utf-8 on host input buffer -================================================================================ -File: compiler/src/main/exports.rs - -What was wrong: - Lines 117-124 read INP (a host-written byte buffer) and constructed a - &str via core::str::from_utf8_unchecked. Nothing in the wasm-abi spec - forces the host to validate UTF-8 before writing INP, so a buggy or - hostile host that writes a non-UTF-8 byte sequence triggered immediate - undefined behaviour: every downstream &str method (split, chars, len) - becomes UB on a non-UTF-8 buffer, even before the parse output is ever - inspected. - - This is a pure boundary-validation gap. The cost of from_utf8 vs - from_utf8_unchecked is one linear scan over the buffer — negligible - next to compile + run. - -Fix: - Replaced the unchecked construction with core::str::from_utf8(bytes) - and degraded invalid input to an empty &str via .unwrap_or(""). - Behaviour for valid UTF-8 is identical; invalid bytes now produce an - empty input_buffer instead of UB. Added a /* ... */ comment in the - lexer/parser style explaining the boundary intent. - - Companion debt D2 (the *mut VM<'static> lifetime laundering five lines - below) is structural and lives on its own branch; not touched here. - -Documentation: - reference/builtins.md only mentions input() at the language level; no - UB or boundary discussion to update. - -Tests: cargo test -> 6/6 passed. diff --git a/workspace-dependencies-table.txt b/workspace-dependencies-table.txt deleted file mode 100644 index f5e9793..0000000 --- a/workspace-dependencies-table.txt +++ /dev/null @@ -1,51 +0,0 @@ -Branch: workspace-dependencies-table -Tech debt: D54 — no [workspace.dependencies] table; lol_alloc duplicated. - -================================================================================ -Commit 1: hoist third-party versions to workspace.dependencies -================================================================================ -Files: - - Cargo.toml (root) - - compiler/Cargo.toml - - edge-pdk/macros/Cargo.toml - - examples/slugify-mod/Cargo.toml - -What was wrong: - Each crate declared its own version of every third-party dep. The - immediate concrete duplication was lol_alloc 0.4, listed in both - compiler/Cargo.toml and examples/slugify-mod/Cargo.toml. Two real - failure modes: - - 1. A bump applied to one Cargo.toml but missed in the other lets - cargo resolve two copies of lol_alloc into the same lockfile, so - every workspace build pulls both versions. - 2. Workspace-wide bumps (security, MSRV, feature) require N edits - and N reviews instead of one. - - hashbrown, itoa, serde, serde_json, proc-macro2, quote, syn each - exist in only one crate today, but the workspace is a published-shape - artifact (cdylib + rlib + PDK). Any new crate that needed e.g. itoa - would need to either re-declare the version or coincidentally pick - the same one — exactly the situation lol_alloc fell into. - -Fix: - Added a [workspace.dependencies] table to the root Cargo.toml with - every shared third-party dep: - hashbrown (default-features = false) - itoa, lol_alloc, proc-macro2, quote, serde_json - syn (features = ["full"]) - serde (features = ["derive"]) - - Each member crate now uses `dep = { workspace = true }`, with the - one exception that needs an extra feature on top - (compiler dev-dependencies for hashbrown adds ["serde"] via - `{ workspace = true, features = ["serde"] }` — this is the - intended cargo pattern for additive-feature opt-in). - - Added a /* ... */-style comment in the root Cargo.toml explaining - the intent so a future reader knows where new shared deps go. - -Documentation: - No .md docs reference the workspace dependency layout. - -Build & tests: cargo build -p edge-python -> ok; cargo test -> 6/6 passed.