diff --git a/Cargo.toml b/Cargo.toml index 886831c..ca4fe87 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,19 @@ edition = "2024" license = "MIT OR Apache-2.0" repository = "https://github.com/dylan-sutton-chavez/edge-python/" +# Single source of truth for third-party versions. Members opt in with +# `dep = { workspace = true }` so a bump touches one line workspace-wide +# and the lockfile cannot grow accidental version splits. +[workspace.dependencies] +hashbrown = { version = "0.17", default-features = false } +itoa = "1" +lol_alloc = "0.4" +proc-macro2 = "1" +quote = "1" +syn = { version = "2", features = ["full"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" + [profile.release] opt-level = "z" lto = true diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index d55fa0a..eecb416 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -20,18 +20,18 @@ name = "tests" path = "tests/main.rs" [dependencies] -hashbrown = { version = "0.17", default-features = false } -itoa = "1" +hashbrown = { workspace = true } +itoa = { workspace = true } # WASM-only global allocator. Pulled into wasm32 builds automatically; absent # on host builds (where std's allocator is fine and `compiler.wasm` is just a # library artifact, not the runtime target). [target.'cfg(target_arch = "wasm32")'.dependencies] -lol_alloc = "0.4" +lol_alloc = { workspace = true } # Test-only deps for the JSON-driven test runner. None of these are pulled # into the release `compiler.wasm`. [dev-dependencies] -hashbrown = { version = "0.17", default-features = false, features = ["serde"] } -serde = { version = "1", features = ["derive"] } -serde_json = "1" +hashbrown = { workspace = true, features = ["serde"] } +serde = { workspace = true } +serde_json = { workspace = true } diff --git a/compiler/README.md b/compiler/README.md index c4751ef..6cc01bf 100644 --- a/compiler/README.md +++ b/compiler/README.md @@ -90,10 +90,11 @@ Mark-and-sweep with roots: operand stack, with-stack, pending yields, event queu │ │ ├── abi_bridge.rs │ │ ├── resolver.rs │ │ └── errors.rs +│ ├── util +│ │ ├── fstr.rs +│ │ ├── fx.rs +│ │ └── sha256.rs │ └── modules -│ ├── fstr.rs -│ ├── fx.rs -│ ├── sha256.rs │ ├── lexer │ │ ├── mod.rs │ │ ├── scan.rs diff --git a/compiler/src/abi.rs b/compiler/src/abi.rs index b76dcba..3dca585 100644 --- a/compiler/src/abi.rs +++ b/compiler/src/abi.rs @@ -7,6 +7,23 @@ Sealed contract for modules. use alloc::{string::String, vec::Vec}; +/* Source-of-truth NaN-boxing layout. Both the wire codec below and + vm::types::Val import from here, so any change touches one site + instead of three. Reserved for the `Sealed contract — v1` set: a + layout change forces a wasm-abi version bump. */ +pub mod nan_box { + pub const QNAN: u64 = 0x7FFC_0000_0000_0000; + pub const SIGN: u64 = 0x8000_0000_0000_0000; + pub const TAG_UNDEF: u64 = QNAN; + pub const TAG_NONE: u64 = QNAN | 1; + pub const TAG_TRUE: u64 = QNAN | 2; + pub const TAG_FALSE: u64 = QNAN | 3; + pub const TAG_INT: u64 = QNAN | SIGN; + pub const TAG_HEAP: u64 = QNAN | 4; + /* 47-bit signed integer payload mask (two's-complement, sign bit at bit 47). */ + pub const INT_PAYLOAD_MASK: u64 = 0x0000_FFFF_FFFF_FFFF; +} + /* Op codes (sealed) */ #[allow(non_camel_case_types)] @@ -184,27 +201,22 @@ pub enum EncodeRequest<'a> { Invalid, } -// Maps (tag, bytes) to EncodeRequest. NaN-boxing layout is sealed here; changes require ABI bump. +// Maps (tag, bytes) to EncodeRequest. NaN-boxing layout is sealed in `nan_box`; changes require ABI bump. pub fn classify_encode(tag: u32, bytes: &[u8]) -> EncodeRequest<'_> { - /* NaN-boxing constants; must match host Val impl. */ - const QNAN: u64 = 0x7FFC_0000_0000_0000; - const TAG_NONE_BITS: u64 = QNAN | 1; - const TAG_TRUE_BITS: u64 = QNAN | 2; - const TAG_FALSE_BITS: u64 = QNAN | 3; - const TAG_INT_BITS: u64 = QNAN | 0x8000_0000_0000_0000; + use nan_box::*; match Tag::from_u32(tag) { - Some(Tag::None) => EncodeRequest::Direct(TAG_NONE_BITS), + Some(Tag::None) => EncodeRequest::Direct(TAG_NONE), Some(Tag::Bool) => { let b = !bytes.is_empty() && bytes[0] != 0; - EncodeRequest::Direct(if b { TAG_TRUE_BITS } else { TAG_FALSE_BITS }) + EncodeRequest::Direct(if b { TAG_TRUE } else { TAG_FALSE }) } Some(Tag::Int) => { if bytes.len() != 8 { return EncodeRequest::Invalid; } let mut buf = [0u8; 8]; buf.copy_from_slice(bytes); let i = i64::from_le_bytes(buf); - EncodeRequest::Direct(TAG_INT_BITS | (i as u64 & 0x0000_FFFF_FFFF_FFFF)) + EncodeRequest::Direct(TAG_INT | (i as u64 & INT_PAYLOAD_MASK)) } Some(Tag::Float) => { if bytes.len() != 8 { return EncodeRequest::Invalid; } @@ -245,10 +257,7 @@ impl PrimitiveBytes { // Classifies Val bits into Primitive/Heap/Invalid; Heap means host must read from HeapPool. pub fn classify_decode(val_bits: u64) -> DecodeBits { - /* Same NaN-boxing constants as classify_encode. */ - const QNAN: u64 = 0x7FFC_0000_0000_0000; - const SIGN: u64 = 0x8000_0000_0000_0000; - const TAG_INT: u64 = QNAN | SIGN; + use nan_box::*; // Float: any non-QNAN-tagged pattern. if (val_bits & QNAN) != QNAN { @@ -259,7 +268,7 @@ pub fn classify_decode(val_bits: u64) -> DecodeBits { } // Int: QNAN|SIGN with payload. if (val_bits & (QNAN | SIGN)) == TAG_INT { - let raw = (val_bits & 0x0000_FFFF_FFFF_FFFF) as i64; + let raw = (val_bits & INT_PAYLOAD_MASK) as i64; let sign_extended = (raw << 16) >> 16; return DecodeBits::Primitive { tag: Tag::Int as u32, @@ -269,17 +278,17 @@ pub fn classify_decode(val_bits: u64) -> DecodeBits { // Singletons and heap handles. let lower = val_bits & 0xF; if (val_bits & QNAN) == QNAN && (val_bits & SIGN) == 0 { - if val_bits == QNAN | 1 { + if val_bits == TAG_NONE { return DecodeBits::Primitive { tag: Tag::None as u32, bytes: PrimitiveBytes::None, }; } - if val_bits == QNAN | 2 { + if val_bits == TAG_TRUE { return DecodeBits::Primitive { tag: Tag::Bool as u32, bytes: PrimitiveBytes::Bool(1), }; } - if val_bits == QNAN | 3 { + if val_bits == TAG_FALSE { return DecodeBits::Primitive { tag: Tag::Bool as u32, bytes: PrimitiveBytes::Bool(0), }; diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 5c4ac88..e491648 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -8,12 +8,19 @@ pub mod abi; #[cfg(target_arch = "wasm32")] pub mod main; -pub mod modules { +/* Internal helpers shared across the compiler — not Edge Python language + modules. Kept separate from `modules/` (which contains lexer/parser/vm/ + packages — runtime components) so contributors don't mistake utility + code for built-in stdlib. */ +pub mod util { pub mod fx; + pub mod fstr; + pub mod sha256; +} + +pub mod modules { pub mod lexer; pub mod vm; pub mod parser; pub mod packages; - pub mod fstr; - pub mod sha256; } \ No newline at end of file diff --git a/compiler/src/main/abi_bridge.rs b/compiler/src/main/abi_bridge.rs index 2a2ced4..516f317 100644 --- a/compiler/src/main/abi_bridge.rs +++ b/compiler/src/main/abi_bridge.rs @@ -1,12 +1,13 @@ -use crate::abi::{classify_decode, classify_encode, DecodeBits, EncodeRequest, Op, PrimitiveBytes, TAG_INVALID}; +use crate::abi::{classify_decode, classify_encode, DecodeBits, EncodeRequest, ErrorKind, Op, PrimitiveBytes, TAG_INVALID}; use crate::modules::vm::types::{HeapObj, Val, VmErr}; use crate::modules::vm::handlers::methods::{lookup_method, dispatch_method}; -use alloc::{rc::Rc, string::{String, ToString}, vec, vec::Vec}; +use crate::modules::packages::NativeBinding; +use alloc::{rc::Rc, string::{String, ToString}, sync::Arc, vec, vec::Vec}; use core::cell::RefCell; use crate::s; -use super::{error_stash, get_val, handles, put_val, with_recv, with_vm}; -use super::errors::stash_error; +use super::{error_stash, get_val, handles, host_call_native, put_val, with_recv, with_vm}; +use super::errors::{error_from_kind, stash_error}; // Universal dispatch. Returns 0 + handle in `*out_handle`, or 1 + stashed error. #[unsafe(no_mangle)] @@ -47,7 +48,10 @@ fn dispatch_call(recv_h: u32, name: &str, args: Vec) -> Result if vm.stack.len() != stack_before + 1 { return Err(VmErr::Runtime("edge_op call: method left no result")); } - Ok(vm.stack.pop().unwrap()) + /* The length check above guarantees a value is present; ok_or + keeps the FFI boundary panic-free if a future change drops + the invariant. */ + vm.stack.pop().ok_or(VmErr::Runtime("edge_op call: stack drained mid-dispatch")) }) } @@ -124,7 +128,7 @@ fn dispatch_get_item(recv_h: u32, args: &[Val]) -> Result { if vm.stack.len() != stack_before + 1 { return Err(VmErr::Runtime("edge_op get_item: get_item left no result")); } - Ok(vm.stack.pop().unwrap()) + vm.stack.pop().ok_or(VmErr::Runtime("edge_op get_item: stack drained mid-dispatch")) }) } @@ -211,7 +215,7 @@ fn dispatch_iter_next(recv_h: u32) -> Result { }) } -// Bootstrap decoder: writes tag to `*out_tag`, bytes to `dst[..dst_max]`. +// Bootstrap encoder: classifies (tag, bytes) into a Val handle; returns 0 on Invalid. #[unsafe(no_mangle)] pub unsafe extern "C" fn host_edge_encode(tag: u32, ptr: *const u8, len: u32) -> u32 { let bytes = if len == 0 || ptr.is_null() { @@ -301,8 +305,12 @@ pub unsafe extern "C" fn host_edge_take_error(out_kind: *mut u32, dst: *mut u8, None => return -1, }; if len > dst_max as usize { return -(len as i32); } - // Buffer fits — drain and copy. - let (_, msg) = stash.take().expect("peek returned Some"); + // Buffer fits — drain and copy. Treat a None on take() as "race lost the + // peek/take window" and surface it as no-pending-error rather than panic + // across the FFI boundary (the .expect() previously here violated + // "panics never cross FFI"; in single-threaded WASM this is unreachable + // today but we don't want a future scheduler change to weaponise it). + let Some((_, msg)) = stash.take() else { return -1; }; let bytes = msg.as_bytes(); unsafe { *out_kind = kind; @@ -312,3 +320,45 @@ pub unsafe extern "C" fn host_edge_take_error(out_kind: *mut u32, dst: *mut u8, } bytes.len() as i32 } + +/* Builds a NativeBinding whose closure translates a VM CallExtern call + into the universal wire ABI: stage args as handles, invoke + `host_call_native(id, ...)`, drain status into Result, and + release every handle (including out_handle) before returning. + + Lives here — not in resolver.rs — because the body is pure ABI + marshalling. The resolver only needs to ask "give me a binding for + (name, id)" and forget about handle plumbing. `pure: false` because + any guest call may have side effects through the host. */ +pub(super) fn make_native_binding(name: String, id: u32) -> NativeBinding { + let closure = move |_: &mut crate::modules::vm::types::HeapPool, args: &[Val]| -> Result { + /* 1. Register args as handles the guest will see. */ + let argv: Vec = args.iter().map(|v| put_val(*v)).collect(); + let mut out_handle: u32 = 0; + + /* 2. Call guest export through the host shim. */ + let status = unsafe { + host_call_native( + id, + argv.as_ptr(), argv.len() as u32, + &mut out_handle as *mut u32, + ) + }; + + /* 3. Translate status/out_handle into Result. Read result + BEFORE releasing — argv release frees the slots `result` may + reference if the guest returned one of its inputs. */ + if status != 0 { + for h in &argv { handles().release(*h); } + let (kind, msg) = error_stash().take() + .unwrap_or((ErrorKind::Runtime as u32, String::from("native call failed"))); + return Err(error_from_kind(kind, msg)); + } + let result = get_val(out_handle) + .ok_or(VmErr::Runtime("native returned invalid handle"))?; + for h in &argv { handles().release(*h); } + handles().release(out_handle); + Ok(result) + }; + NativeBinding { name, func: Arc::new(closure), pure: false } +} diff --git a/compiler/src/main/exports.rs b/compiler/src/main/exports.rs index 42dca41..089a22e 100644 --- a/compiler/src/main/exports.rs +++ b/compiler/src/main/exports.rs @@ -23,6 +23,18 @@ pub unsafe extern "C" fn wasm_alloc(size: u32) -> *mut u8 { Box::into_raw(v.into_boxed_slice()) as *mut u8 } +/* Releases a buffer previously returned by `wasm_alloc`. The host MUST pass + the exact same `size` it requested; mismatched lengths reconstruct the + wrong Box layout. Calling with a null pointer or `size == 0` is a no-op. */ +#[unsafe(no_mangle)] +pub unsafe extern "C" fn wasm_free(ptr: *mut u8, size: u32) { + if ptr.is_null() || size == 0 { return; } + unsafe { + let slice = core::slice::from_raw_parts_mut(ptr, size as usize); + let _ = Box::from_raw(slice as *mut [u8]); + } +} + #[unsafe(no_mangle)] pub unsafe extern "C" fn register_code_module( spec_ptr: *const u8, spec_len: u32, @@ -68,12 +80,20 @@ pub unsafe extern "C" fn reset_modules() { error_stash().clear(); } -#[unsafe(no_mangle)] -pub unsafe extern "C" fn extract_imports(len: usize) -> usize { +/* Reads up to SZ bytes from the host-owned SRC buffer and validates UTF-8. + `len` is capped so the slice never extends past the buffer; callers decide + how to surface a UTF-8 failure (silent vs. user-facing error). */ +unsafe fn read_src(len: usize) -> Result<&'static str, core::str::Utf8Error> { let len = len.min(SZ); - let src = match core::str::from_utf8(unsafe { + let bytes = unsafe { core::slice::from_raw_parts(core::ptr::addr_of!(SRC) as *const u8, len) - }) { + }; + core::str::from_utf8(bytes) +} + +#[unsafe(no_mangle)] +pub unsafe extern "C" fn extract_imports(len: usize) -> usize { + let src = match unsafe { read_src(len) } { Ok(s) => s, Err(_) => return unsafe { write_out("") }, }; @@ -84,10 +104,7 @@ pub unsafe extern "C" fn extract_imports(len: usize) -> usize { #[unsafe(no_mangle)] pub unsafe extern "C" fn run(len: usize) -> usize { - let len = len.min(SZ); - let src = match core::str::from_utf8(unsafe { - core::slice::from_raw_parts(core::ptr::addr_of!(SRC) as *const u8, len) - }) { + let src = match unsafe { read_src(len) } { Ok(s) => s, Err(e) => return unsafe { write_out(&s!("input rejected: invalid utf-8 at byte ", int e.valid_up_to())) @@ -116,9 +133,12 @@ pub unsafe extern "C" fn run(len: usize) -> usize { vm.strict_input = true; let inp_len = unsafe { INP_LEN }; if inp_len > 0 { - let inp = unsafe { core::str::from_utf8_unchecked( + /* Host-supplied buffer; validate UTF-8 to keep the FFI boundary safe. + Invalid bytes degrade to an empty input rather than UB. */ + let bytes = unsafe { core::slice::from_raw_parts(core::ptr::addr_of!(INP) as *const u8, inp_len) - )}; + }; + let inp = core::str::from_utf8(bytes).unwrap_or(""); vm.input_buffer = inp.split('\n').map(alloc::string::String::from).collect(); unsafe { INP_LEN = 0; } } diff --git a/compiler/src/main/mod.rs b/compiler/src/main/mod.rs index 42872c9..a8dd59e 100644 --- a/compiler/src/main/mod.rs +++ b/compiler/src/main/mod.rs @@ -37,8 +37,22 @@ A ~3,000-alloc perceptron run pays ~600 ms; bumping cuts it to ~50 grows. #[global_allocator] static A: AssumeSingleThreaded = unsafe { AssumeSingleThreaded::new(LeakingAllocator::new()) }; +/* Best-effort panic-to-stash: the host's edge_take_error then sees a typed + message instead of an opaque WASM trap. If the format allocation itself + re-enters this handler we fall through to unreachable(); the host trap + behaviour is unchanged from the previous bare implementation. */ #[panic_handler] -fn panic(_: &core::panic::PanicInfo) -> ! { core::arch::wasm32::unreachable() } +fn panic(info: &core::panic::PanicInfo) -> ! { + let msg = alloc::format!("internal panic: {}", info.message()); + unsafe { + let p = core::ptr::addr_of_mut!(ERROR_STASH); + if (*p).is_none() { *p = Some(ErrorStash::new()); } + if let Some(stash) = (*p).as_mut() { + stash.set(crate::abi::ErrorKind::Runtime as u32, msg); + } + } + core::arch::wasm32::unreachable() +} pub(super) const SZ: usize = 1 << 20; #[allow(non_upper_case_globals)] diff --git a/compiler/src/main/resolver.rs b/compiler/src/main/resolver.rs index 9e8bdb2..cb0b341 100644 --- a/compiler/src/main/resolver.rs +++ b/compiler/src/main/resolver.rs @@ -1,12 +1,15 @@ -use crate::modules::vm::types::{HeapPool, Val, VmErr}; use crate::modules::packages::{NativeBinding, Resolved, Resolver, parse_manifest, walk_up_dirs, dir_of, join_relative}; -use crate::modules::fx::FxHashSet; -use alloc::{boxed::Box, string::{String, ToString}, sync::Arc, vec::Vec}; +use crate::util::fx::FxHashSet; +use alloc::{boxed::Box, string::{String, ToString}, vec::Vec}; use crate::s; -use super::{ModuleEntry, error_stash, get_val, handles, host_fetch_bytes, manifests, put_val, registry}; -use super::errors::error_from_kind; -use crate::abi::ErrorKind; +use super::{ModuleEntry, host_fetch_bytes, manifests, registry}; +use super::abi_bridge::make_native_binding; + +/* Hard cap on packages.json `extends` chain length. Prevents an attacker- + crafted manifest from looping the resolver indefinitely; 32 is well above + any sane real-world workspace depth. */ +const MAX_PACKAGES_HOPS: u32 = 32; pub(super) struct WasmHostResolver { pub(super) dir: String } @@ -47,8 +50,11 @@ impl WasmHostResolver { let mut search_dir = start_dir.to_string(); let mut hops: u32 = 0; loop { - if hops > 32 { - return Err(s!("packages.json walk-up exceeded 32 hops resolving '", str name, "'")); + if hops > MAX_PACKAGES_HOPS { + return Err(s!( + "packages.json walk-up exceeded ", + int MAX_PACKAGES_HOPS as i64, + " hops resolving '", str name, "'")); } hops += 1; @@ -111,42 +117,9 @@ impl WasmHostResolver { canonical: spec.to_string(), }), ModuleEntry::Native(funcs) => { - let bindings: Vec = funcs.iter().map(|(name, id)| { - let id = *id; - // Translate VM CallExtern into the universal ABI wire shape. - let closure = move |_: &mut HeapPool, args: &[Val]| -> Result - { - // 1. Register args as handles. - let argv: Vec = args.iter().map(|v| put_val(*v)).collect(); - let mut out_handle: u32 = 0; - - // 2. Call guest export through the host shim. - let status = unsafe { - super::host_call_native( - id, - argv.as_ptr(), argv.len() as u32, - &mut out_handle as *mut u32, - ) - }; - - // 3. Translate status/out_handle into Result. Read result BEFORE releasing — order matters. - if status != 0 { - for h in &argv { handles().release(*h); } - let (kind, msg) = error_stash().take().unwrap_or((ErrorKind::Runtime as u32, String::from("native call failed"))); - return Err(error_from_kind(kind, msg)); - } - let result = get_val(out_handle) - .ok_or(VmErr::Runtime("native returned invalid handle"))?; - for h in &argv { handles().release(*h); } - handles().release(out_handle); - Ok(result) - }; - NativeBinding { - name: name.clone(), - func: Arc::new(closure), - pure: false, - } - }).collect(); + let bindings: Vec = funcs.iter() + .map(|(name, id)| make_native_binding(name.clone(), *id)) + .collect(); Ok(Resolved::Native { bindings, canonical: spec.to_string(), diff --git a/compiler/src/modules/packages/manifest.rs b/compiler/src/modules/packages/manifest.rs index ec9ce20..214b84a 100644 --- a/compiler/src/modules/packages/manifest.rs +++ b/compiler/src/modules/packages/manifest.rs @@ -1,7 +1,7 @@ use alloc::string::{String, ToString}; use crate::s; -use crate::modules::fx::FxHashMap; +use crate::util::fx::FxHashMap; /* Parsed `packages.json`. `imports` maps bare names to specs; `extends` inherits another manifest's imports when a name isn't local. */ #[derive(Clone)] diff --git a/compiler/src/modules/packages/mod.rs b/compiler/src/modules/packages/mod.rs index e55f3c1..5b8f8de 100644 --- a/compiler/src/modules/packages/mod.rs +++ b/compiler/src/modules/packages/mod.rs @@ -65,7 +65,7 @@ pub fn parse_integrity(spec: &str) -> Result<(&str, Option<[u8; 32]>), String> { "sha256 fragment must be 64 hex chars in '", str spec, "'; got ", int hex.len() as i64)); } - let hash = crate::modules::sha256::hex_decode_32(hex).ok_or_else(|| s!( + let hash = crate::util::sha256::hex_decode_32(hex).ok_or_else(|| s!( "invalid hex in sha256 fragment of '", str spec, "'"))?; Ok((url, Some(hash))) } @@ -88,11 +88,6 @@ impl Default for Box { fn default() -> Self { Box::new(NoopResolver) } } -/* Re-exports core types; hosts get trait, enums, binding, and default resolver via glob import. */ -pub use NativeBinding as Binding; -pub use Resolved as ResolvedModule; -pub use NoopResolver as Default_; - /* Converts public NativeBinding into internal ExternFn; two structs separate host API from VM storage. */ pub(crate) fn binding_to_extern(b: &NativeBinding) -> crate::modules::vm::types::ExternFn { crate::modules::vm::types::ExternFn { diff --git a/compiler/src/modules/parser/expr.rs b/compiler/src/modules/parser/expr.rs index 21f67c7..5095d2b 100644 --- a/compiler/src/modules/parser/expr.rs +++ b/compiler/src/modules/parser/expr.rs @@ -280,7 +280,7 @@ impl<'src, I: Iterator> Parser<'src, I> { }; match parsed { Some(v) => self.emit_const(Value::Int(v)), - None => self.error("integer literal too large for 47-bit Val (max \u{00b1}140737488355327)"), + None => self.error("integer literal too large to represent"), } } @@ -387,7 +387,7 @@ impl<'src, I: Iterator> Parser<'src, I> { s.chunk.emit(OpCode::ReturnValue, 0); }); - let param_slots: crate::modules::fx::FxHashSet = params.iter().map(|p| s!(str p.trim_start_matches('*'), "_0")).collect(); + let param_slots: crate::util::fx::FxHashSet = params.iter().map(|p| s!(str p.trim_start_matches('*'), "_0")).collect(); for name in &body.names { if !param_slots.contains(name.as_str()) { self.chunk.push_name(name); diff --git a/compiler/src/modules/parser/imports.rs b/compiler/src/modules/parser/imports.rs index 7f12e43..5e064ec 100644 --- a/compiler/src/modules/parser/imports.rs +++ b/compiler/src/modules/parser/imports.rs @@ -8,8 +8,8 @@ use super::Parser; use super::types::{Diagnostic, ImportEntry, ImportKind, OpCode, SSAChunk, parse_string, ssa_strip}; use crate::modules::lexer::{Token, TokenType, lex}; use crate::modules::packages::{Resolved, binding_to_extern, parse_integrity}; -use crate::modules::sha256::{sha256, hex_encode}; -use crate::modules::fx::FxHashSet; +use crate::util::sha256::{sha256, hex_encode}; +use crate::util::fx::FxHashSet; use alloc::{string::{String, ToString}, vec::Vec}; diff --git a/compiler/src/modules/parser/literals.rs b/compiler/src/modules/parser/literals.rs index 9d54103..da3ef24 100644 --- a/compiler/src/modules/parser/literals.rs +++ b/compiler/src/modules/parser/literals.rs @@ -5,7 +5,7 @@ use super::types::builtin; use super::types::{OpCode, Value, SSAChunk, Instruction}; use crate::modules::lexer::{Token, TokenType, utf8_char_len}; -use crate::modules::fx::FxHashMap as HashMap; +use crate::util::fx::FxHashMap as HashMap; use alloc::{string::{String, ToString}, vec::Vec}; @@ -437,7 +437,7 @@ impl<'src, I: Iterator> Parser<'src, I> { let body = self.compile_body(¶ms); // Propagate free names to parent chunk so nested defs capture grandparent vars. - let param_slots: crate::modules::fx::FxHashSet = params.iter() + let param_slots: crate::util::fx::FxHashSet = params.iter() .map(|p| s!(str p.trim_start_matches(['*', '~']), "_0")).collect(); for name in &body.names { if !param_slots.contains(name.as_str()) { diff --git a/compiler/src/modules/parser/mod.rs b/compiler/src/modules/parser/mod.rs index 420c687..76edcef 100644 --- a/compiler/src/modules/parser/mod.rs +++ b/compiler/src/modules/parser/mod.rs @@ -10,7 +10,7 @@ pub use types::*; use crate::s; use crate::modules::lexer::{Token, TokenType}; -use crate::modules::fx::FxHashMap as HashMap; +use crate::util::fx::FxHashMap as HashMap; use crate::modules::packages::{Resolver, NoopResolver}; use alloc::{boxed::Box, string::{String, ToString}, vec::Vec}; diff --git a/compiler/src/modules/parser/types.rs b/compiler/src/modules/parser/types.rs index 396a288..6d56d4c 100644 --- a/compiler/src/modules/parser/types.rs +++ b/compiler/src/modules/parser/types.rs @@ -1,5 +1,5 @@ use crate::s; -use crate::modules::fx::FxHashMap as HashMap; +use crate::util::fx::FxHashMap as HashMap; use crate::modules::vm::types::ExternFn; use alloc::{string::{String, ToString}, vec, vec::Vec}; @@ -355,9 +355,16 @@ impl Diagnostic { } } +/* Scan only the prefix chars before the opening quote; the body itself may legally contain 'r'/'R'. */ +fn has_raw_prefix(s: &str) -> bool { + s.bytes() + .take_while(|b| !matches!(b, b'"' | b'\'')) + .any(|b| matches!(b, b'r' | b'R')) +} + // Strip prefix + quotes and unescape (skipped for raw strings). pub(super) fn parse_string(s: &str) -> String { - let is_raw = s.contains('r') || s.contains('R'); + let is_raw = has_raw_prefix(s); let s = s.trim_start_matches(|c: char| "bBrRuU".contains(c)); let inner = if s.starts_with("\"\"\"") || s.starts_with("'''") { &s[3..s.len() - 3] @@ -370,7 +377,7 @@ pub(super) fn parse_string(s: &str) -> String { /* Parses b"..." to raw bytes: non-ASCII pass through; \xHH=single byte; \u/\U/\N rejected. */ pub(super) fn parse_bytes_literal(s: &str) -> alloc::vec::Vec { let bytes = s.as_bytes(); - let is_raw = s.contains('r') || s.contains('R'); + let is_raw = has_raw_prefix(s); // Skip b/B/r/R prefix chars. let mut i = 0; while i < bytes.len() && matches!(bytes[i], b'b' | b'B' | b'r' | b'R') { @@ -395,6 +402,10 @@ pub(super) fn parse_bytes_literal(s: &str) -> alloc::vec::Vec { b'n' => { out.push(b'\n'); j += 2; } b't' => { out.push(b'\t'); j += 2; } b'r' => { out.push(b'\r'); j += 2; } + b'a' => { out.push(0x07); j += 2; } + b'b' => { out.push(0x08); j += 2; } + b'f' => { out.push(0x0C); j += 2; } + b'v' => { out.push(0x0B); j += 2; } b'\\' => { out.push(b'\\'); j += 2; } b'\'' => { out.push(b'\''); j += 2; } b'"' => { out.push(b'"'); j += 2; } @@ -434,6 +445,10 @@ fn unescape(s: &str) -> String { Some('n') => out.push('\n'), Some('t') => out.push('\t'), Some('r') => out.push('\r'), + Some('a') => out.push('\u{07}'), + Some('b') => out.push('\u{08}'), + Some('f') => out.push('\u{0C}'), + Some('v') => out.push('\u{0B}'), Some('\\') => out.push('\\'), Some('\'') => out.push('\''), Some('"') => out.push('"'), diff --git a/compiler/src/modules/vm/builtins/attr.rs b/compiler/src/modules/vm/builtins/attr.rs index c4ed16a..5ff433d 100644 --- a/compiler/src/modules/vm/builtins/attr.rs +++ b/compiler/src/modules/vm/builtins/attr.rs @@ -124,8 +124,8 @@ impl<'a> VM<'a> { &mut self, chunk: &crate::modules::parser::SSAChunk, slots: &[Val], ) -> Result<(), VmErr> { // Builtin/type/module pairs from self.globals, deduped to bare names. - let mut out: crate::modules::fx::FxHashMap = - crate::modules::fx::FxHashMap::default(); + let mut out: crate::util::fx::FxHashMap = + crate::util::fx::FxHashMap::default(); for (k, v) in self.globals.iter() { // Drop SSA-mirrors (`x_0`, `x_1`); keep canonical bare name. if let Some((bare, suf)) = k.rsplit_once('_') @@ -180,8 +180,8 @@ impl<'a> VM<'a> { &mut self, chunk: &crate::modules::parser::SSAChunk, slots: &[Val], ) -> Result<(), VmErr> { // Map bare-name -> (best version, val) so we keep only the latest. - let mut latest: crate::modules::fx::FxHashMap = - crate::modules::fx::FxHashMap::default(); + let mut latest: crate::util::fx::FxHashMap = + crate::util::fx::FxHashMap::default(); for (i, name) in chunk.names.iter().enumerate() { let v = match slots.get(i) { Some(v) if !v.is_undef() => *v, diff --git a/compiler/src/modules/vm/builtins/container.rs b/compiler/src/modules/vm/builtins/container.rs index ac6d46b..f8033f0 100644 --- a/compiler/src/modules/vm/builtins/container.rs +++ b/compiler/src/modules/vm/builtins/container.rs @@ -1,6 +1,6 @@ use core::cell::RefCell; use alloc::{rc::Rc, string::String, vec::Vec}; -use crate::modules::fx::FxHashSet as HashSet; +use crate::util::fx::FxHashSet as HashSet; use super::super::VM; use super::super::types::*; diff --git a/compiler/src/modules/vm/builtins/identity.rs b/compiler/src/modules/vm/builtins/identity.rs index 5b39437..4131969 100644 --- a/compiler/src/modules/vm/builtins/identity.rs +++ b/compiler/src/modules/vm/builtins/identity.rs @@ -33,7 +33,7 @@ impl<'a> VM<'a> { pub fn call_hash(&mut self) -> Result<(), VmErr> { use core::hash::{Hash, Hasher}; let o = self.pop()?; - let mut h = crate::modules::fx::FxHasher::default(); + let mut h = crate::util::fx::FxHasher::default(); if o.is_int() { o.as_int().hash(&mut h); } else if o.is_float() { o.as_float().to_bits().hash(&mut h); } else if o.is_bool() { o.as_bool().hash(&mut h); } diff --git a/compiler/src/modules/vm/cache.rs b/compiler/src/modules/vm/cache.rs index 0709f84..6898570 100644 --- a/compiler/src/modules/vm/cache.rs +++ b/compiler/src/modules/vm/cache.rs @@ -153,6 +153,19 @@ fn hash_args(args: &[Val]) -> u64 { h } +/* True when every arg is value-stable across calls. Mutable containers + (List, Dict, Set, Instance) hash by raw heap-index bits, so a caller + that mutates the same container between two calls produces a stale + cache hit. Only memoize when every arg is byte-by-byte immutable. */ +fn args_memoizable(args: &[Val], heap: &super::types::HeapPool) -> bool { + use super::types::HeapObj; + args.iter().all(|v| { + if !v.is_heap() { return true; } + !matches!(heap.get(*v), HeapObj::List(_) | HeapObj::Dict(_) + | HeapObj::Set(_) | HeapObj::Instance(..)) + }) +} + // Indexed by `fi` (function id, dense from 0..N). Vec gives O(1) lookup // without a HashMap monomorphization. pub struct Templates { slots: Vec> } @@ -168,6 +181,7 @@ impl Templates { } pub fn record(&mut self, fi: usize, args: &[Val], result: Val, heap: &super::types::HeapPool) { + if !args_memoizable(args, heap) { return; } if self.slots.len() <= fi { self.slots.resize_with(fi + 1, Vec::new); } let h = hash_args(args); let v = &mut self.slots[fi]; diff --git a/compiler/src/modules/vm/dispatch.rs b/compiler/src/modules/vm/dispatch.rs index 4233317..cdb10bc 100644 --- a/compiler/src/modules/vm/dispatch.rs +++ b/compiler/src/modules/vm/dispatch.rs @@ -7,15 +7,24 @@ use super::{ExceptionFrame, VM, handlers}; use super::types::*; use super::cache::{OpcodeCache, FastOp}; +/* Three-way result of a fast-path attempt; see exec_fast for semantics. */ +enum FastOutcome { Done, TypeMiss, Overflow } + impl<'a> VM<'a> { - /* Inline-cache fast path. Peeks the stack and only pops on success; - returns Ok(false) with the stack untouched on a type-guard miss - so the caller can fall back to the generic handler and deopt the IC. */ + /* Inline-cache fast path. Peeks the stack and only pops on success. + Three outcomes: + Done — the op ran inline; stack consumed and result pushed. + TypeMiss — operands didn't match the speculation; deopt the IC. + Overflow — types matched but the result can't be represented (int + overflow, division by zero); the slow handler will + raise the proper Python exception. The IC stays warm + because the speculation was correct: the op IS hot + on these types, only this *one* input pair fell out. */ #[inline] - fn exec_fast(&mut self, fast: FastOp) -> Result { + fn exec_fast(&mut self, fast: FastOp) -> Result { let len = self.stack.len(); - if len < 2 { return Ok(false); } + if len < 2 { return Ok(FastOutcome::TypeMiss); } let a = self.stack[len - 2]; let b = self.stack[len - 1]; @@ -25,28 +34,28 @@ impl<'a> VM<'a> { FastOp::AddInt if a.is_int() && b.is_int() => { match a.as_int().checked_add(b.as_int()).and_then(Val::int_checked) { Some(v) => v, - None => return Ok(false), + None => return Ok(FastOutcome::Overflow), } } FastOp::SubInt if a.is_int() && b.is_int() => { match a.as_int().checked_sub(b.as_int()).and_then(Val::int_checked) { Some(v) => v, - None => return Ok(false), + None => return Ok(FastOutcome::Overflow), } } FastOp::MulInt if a.is_int() && b.is_int() => { let r = a.as_int() as i128 * b.as_int() as i128; - if r >= Val::INT_MIN as i128 && r <= Val::INT_MAX as i128 { Val::int(r as i64) } else { return Ok(false); } + if r >= Val::INT_MIN as i128 && r <= Val::INT_MAX as i128 { Val::int(r as i64) } else { return Ok(FastOutcome::Overflow); } } FastOp::MulFloat if a.is_float() && b.is_float() => Val::float(a.as_float() * b.as_float()), FastOp::ModInt if a.is_int() && b.is_int() => { let bv = b.as_int(); - if bv == 0 { return Ok(false); } + if bv == 0 { return Ok(FastOutcome::Overflow); } Val::int(((a.as_int() % bv) + bv) % bv) } FastOp::FloorDivInt if a.is_int() && b.is_int() => { let bv = b.as_int(); - if bv == 0 { return Ok(false); } + if bv == 0 { return Ok(FastOutcome::Overflow); } Val::int(a.as_int().div_euclid(bv)) } @@ -61,7 +70,7 @@ impl<'a> VM<'a> { FastOp::AddStr | FastOp::EqStr if a.is_heap() && b.is_heap() => { let (sa, sb) = match (self.heap.get(a), self.heap.get(b)) { (HeapObj::Str(x), HeapObj::Str(y)) => (x.clone(), y.clone()), - _ => return Ok(false), + _ => return Ok(FastOutcome::TypeMiss), }; match fast { FastOp::AddStr => { @@ -73,12 +82,12 @@ impl<'a> VM<'a> { } } - _ => return Ok(false), + _ => return Ok(FastOutcome::TypeMiss), }; self.stack.truncate(len - 2); self.push(result); - Ok(true) + Ok(FastOutcome::Done) } /* Main dispatch loop. Walks the fused instruction stream (LoadAttr+Call @@ -368,8 +377,13 @@ impl<'a> VM<'a> { | OpCode::Eq | OpCode::Lt | OpCode::NotEq | OpCode::Gt | OpCode::LtEq | OpCode::GtEq => { if let Some(fast) = cache.get_fast(rip) { - if self.exec_fast(fast)? { return Ok(None); } - cache.invalidate(rip); + match self.exec_fast(fast)? { + FastOutcome::Done => return Ok(None), + /* Speculation was right (matching types); the slow handler + will raise the proper Python exception. Keep the IC. */ + FastOutcome::Overflow => {} + FastOutcome::TypeMiss => cache.invalidate(rip), + } } if matches!(ins.opcode, OpCode::Eq | OpCode::Lt | OpCode::NotEq | OpCode::Gt | OpCode::LtEq | OpCode::GtEq) @@ -652,9 +666,6 @@ impl<'a> VM<'a> { // Emitted by `break` inside a for-loop to drop the abandoned // iterator so the surrounding for-iter reads from its own iter. OpCode::PopIter => { self.iter_stack.pop(); } - OpCode::MakeClass | OpCode::StoreAttr => { - return Err(cold_runtime("MakeClass/StoreAttr must be in main dispatch")); - } _ => return Err(cold_runtime("unexpected opcode in generic dispatch")), } Ok(()) diff --git a/compiler/src/modules/vm/handlers/format.rs b/compiler/src/modules/vm/handlers/format.rs index 2bda5e4..845db5c 100644 --- a/compiler/src/modules/vm/handlers/format.rs +++ b/compiler/src/modules/vm/handlers/format.rs @@ -426,7 +426,7 @@ pub fn display_inline(v: Val, heap: &HeapPool) -> String { } if v.is_bool() { return (if v.as_bool() { "True" } else { "False" }).to_string(); } if v.is_none() { return String::from("None"); } - if v.is_float() { return crate::modules::fstr::format_f64(v.as_float()); } + if v.is_float() { return crate::util::fstr::format_f64(v.as_float()); } if v.is_heap() && let HeapObj::Str(s) = heap.get(v) { return s.clone(); } /* Fall back to nothing — caller should use VM::display for full coverage. */ diff --git a/compiler/src/modules/vm/handlers/function.rs b/compiler/src/modules/vm/handlers/function.rs index b469a30..2fe55bc 100644 --- a/compiler/src/modules/vm/handlers/function.rs +++ b/compiler/src/modules/vm/handlers/function.rs @@ -76,13 +76,13 @@ impl<'a> VM<'a> { let defaults = if n_defaults > 0 { self.pop_n(n_defaults)? } else { vec![] }; let (params, body, _, _) = self.functions[global]; - let param_names: crate::modules::fx::FxHashSet = params.iter().map(|p| s!(str p.trim_start_matches(['*', '~']), "_0")).collect(); + let param_names: crate::util::fx::FxHashSet = params.iter().map(|p| s!(str p.trim_start_matches(['*', '~']), "_0")).collect(); let mut captures: Vec<(usize, Val)> = Vec::new(); // Capture closure values once per canonical (coalesced) slot, skipping // names already bound as formal parameters. The body.names list is // typically <30, so a linear scan over chunk.names is competitive // with a HashMap and avoids a per-call monomorphization. - let mut seen_canonical: crate::modules::fx::FxHashSet = crate::modules::fx::FxHashSet::default(); + let mut seen_canonical: crate::util::fx::FxHashSet = crate::util::fx::FxHashSet::default(); for (bi, bname) in body.names.iter().enumerate() { if param_names.contains(bname.as_str()) { continue; } let canon = body.alias_groups.get(bi) @@ -340,12 +340,12 @@ impl<'a> VM<'a> { let callee_module = self.fn_module.get(fi).cloned().flatten(); let same_scope = caller_fi == callee_parent_fi && caller_module == callee_module; - let captured_set: crate::modules::fx::FxHashSet = if same_scope { - crate::modules::fx::FxHashSet::default() + let captured_set: crate::util::fx::FxHashSet = if same_scope { + crate::util::fx::FxHashSet::default() } else if let HeapObj::Func(_, _, captures) = self.heap.get(callee) { captures.iter().map(|(s, _)| *s).collect() } else { - crate::modules::fx::FxHashSet::default() + crate::util::fx::FxHashSet::default() }; for (si, &v) in slots.iter().enumerate() { if !v.is_undef() diff --git a/compiler/src/modules/vm/handlers/methods.rs b/compiler/src/modules/vm/handlers/methods.rs index 180a08b..022f6df 100644 --- a/compiler/src/modules/vm/handlers/methods.rs +++ b/compiler/src/modules/vm/handlers/methods.rs @@ -846,7 +846,7 @@ define_methods! { check_arity(&pos, 1, 1, "intersection takes 1 argument")?; let lhs = set_clone(vm, recv)?; let rhs_items = iter_to_vec(vm, pos[0])?; - let rhs: crate::modules::fx::FxHashSet = rhs_items.into_iter().collect(); + let rhs: crate::util::fx::FxHashSet = rhs_items.into_iter().collect(); let out: Vec = lhs.into_iter().filter(|v| rhs.contains(v)).collect(); vm.alloc_and_push_set(out) }), @@ -854,34 +854,34 @@ define_methods! { check_arity(&pos, 1, 1, "difference takes 1 argument")?; let lhs = set_clone(vm, recv)?; let rhs_items = iter_to_vec(vm, pos[0])?; - let rhs: crate::modules::fx::FxHashSet = rhs_items.into_iter().collect(); + let rhs: crate::util::fx::FxHashSet = rhs_items.into_iter().collect(); let out: Vec = lhs.into_iter().filter(|v| !rhs.contains(v)).collect(); vm.alloc_and_push_set(out) }), (SetSymmetricDifference, "symmetric_difference", pure, |vm, recv, pos| { check_arity(&pos, 1, 1, "symmetric_difference takes 1 argument")?; - let lhs: crate::modules::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); - let rhs: crate::modules::fx::FxHashSet = iter_to_vec(vm, pos[0])?.into_iter().collect(); + let lhs: crate::util::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); + let rhs: crate::util::fx::FxHashSet = iter_to_vec(vm, pos[0])?.into_iter().collect(); let out: Vec = lhs.symmetric_difference(&rhs).copied().collect(); vm.alloc_and_push_set(out) }), (SetIsSubset, "issubset", pure, |vm, recv, pos| { check_arity(&pos, 1, 1, "issubset takes 1 argument")?; let lhs = set_clone(vm, recv)?; - let rhs: crate::modules::fx::FxHashSet = iter_to_vec(vm, pos[0])?.into_iter().collect(); + let rhs: crate::util::fx::FxHashSet = iter_to_vec(vm, pos[0])?.into_iter().collect(); vm.push(Val::bool(lhs.iter().all(|v| rhs.contains(v)))); Ok(()) }), (SetIsSuperset, "issuperset", pure, |vm, recv, pos| { check_arity(&pos, 1, 1, "issuperset takes 1 argument")?; - let lhs: crate::modules::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); + let lhs: crate::util::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); let rhs = iter_to_vec(vm, pos[0])?; vm.push(Val::bool(rhs.iter().all(|v| lhs.contains(v)))); Ok(()) }), (SetIsDisjoint, "isdisjoint", pure, |vm, recv, pos| { check_arity(&pos, 1, 1, "isdisjoint takes 1 argument")?; - let lhs: crate::modules::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); + let lhs: crate::util::fx::FxHashSet = set_clone(vm, recv)?.into_iter().collect(); let rhs = iter_to_vec(vm, pos[0])?; vm.push(Val::bool(!rhs.iter().any(|v| lhs.contains(v)))); Ok(()) diff --git a/compiler/src/modules/vm/handlers/methods_helpers.rs b/compiler/src/modules/vm/handlers/methods_helpers.rs index 2bc5267..7049180 100644 --- a/compiler/src/modules/vm/handlers/methods_helpers.rs +++ b/compiler/src/modules/vm/handlers/methods_helpers.rs @@ -96,7 +96,7 @@ pub(super) fn set_clone(vm: &VM, recv: Val) -> Result, VmErr> { (add, remove, discard, pop, clear, update). */ #[inline] pub(super) fn set_mut(vm: &mut VM, recv: Val, err: &'static str, f: F) -> Result -where F: FnOnce(&mut crate::modules::fx::FxHashSet) -> Result +where F: FnOnce(&mut crate::util::fx::FxHashSet) -> Result { match vm.heap.get_mut(recv) { HeapObj::Set(rc) => f(&mut rc.borrow_mut()), diff --git a/compiler/src/modules/vm/init.rs b/compiler/src/modules/vm/init.rs index a972734..4e66e02 100644 --- a/compiler/src/modules/vm/init.rs +++ b/compiler/src/modules/vm/init.rs @@ -12,8 +12,8 @@ use super::types::*; canonical slot; `seen` deduplicates so the latest value wins. */ fn collect_module_attrs(chunk: &SSAChunk, slots: &[Val]) -> Vec<(String, Val)> { let mut attrs: Vec<(String, Val)> = Vec::new(); - let mut seen: crate::modules::fx::FxHashSet = - crate::modules::fx::FxHashSet::default(); + let mut seen: crate::util::fx::FxHashSet = + crate::util::fx::FxHashSet::default(); for ins in &chunk.instructions { if !matches!(ins.opcode, OpCode::StoreName) { continue; } let Some(name) = chunk.names.get(ins.operand as usize) else { continue; }; @@ -97,8 +97,8 @@ impl<'a> VM<'a> { // user code dispatches. Topological order falls out of recursive // descent: a module's dependencies are seen + initialised before // its own top-level runs. - let mut in_progress: crate::modules::fx::FxHashSet = - crate::modules::fx::FxHashSet::default(); + let mut in_progress: crate::util::fx::FxHashSet = + crate::util::fx::FxHashSet::default(); self.init_modules(self.chunk, &mut in_progress)?; let mut slots = self.fill_builtins(&self.chunk.names); self.exec(self.chunk, &mut slots) @@ -114,7 +114,7 @@ impl<'a> VM<'a> { fn init_modules( &mut self, chunk: &SSAChunk, - in_progress: &mut crate::modules::fx::FxHashSet, + in_progress: &mut crate::util::fx::FxHashSet, ) -> Result<(), VmErr> { for entry in &chunk.imports { if self.module_table.contains_key(&entry.spec) { continue; } diff --git a/compiler/src/modules/vm/mod.rs b/compiler/src/modules/vm/mod.rs index 63e23c1..75bb63d 100644 --- a/compiler/src/modules/vm/mod.rs +++ b/compiler/src/modules/vm/mod.rs @@ -12,7 +12,7 @@ mod init; use crate::s; use crate::modules::parser::{SSAChunk, BUILTIN_TYPES}; -use crate::modules::fx::FxHashMap as HashMap; +use crate::util::fx::FxHashMap as HashMap; pub use types::{Val, HeapObj, HeapPool, VmErr, Limits}; @@ -230,7 +230,7 @@ impl<'a> VM<'a> { // True iff the body references names not in params/builtins/captures. vm.needs_caller_slots = (0..vm.functions.len()).map(|fi| { let (params, body, _, _) = vm.functions[fi]; - let param_names: crate::modules::fx::FxHashSet<&str> = params.iter() + let param_names: crate::util::fx::FxHashSet<&str> = params.iter() .map(|p| p.trim_start_matches(['*', '~'])).collect(); body.names.iter().any(|n| { let base = crate::modules::parser::ssa_strip(n); @@ -255,7 +255,7 @@ impl<'a> VM<'a> { vm.body_free_loads = (0..vm.functions.len()).map(|fi| { let (_, body, _, _) = vm.functions[fi]; let param_bm = &vm.is_param_slot[fi]; - let mut written: crate::modules::fx::FxHashSet = crate::modules::fx::FxHashSet::default(); + let mut written: crate::util::fx::FxHashSet = crate::util::fx::FxHashSet::default(); for ins in &body.instructions { if matches!(ins.opcode, crate::modules::parser::OpCode::StoreName | crate::modules::parser::OpCode::Phi) { written.insert(ins.operand as usize); diff --git a/compiler/src/modules/vm/ops.rs b/compiler/src/modules/vm/ops.rs index 8aa2141..bb9aef8 100644 --- a/compiler/src/modules/vm/ops.rs +++ b/compiler/src/modules/vm/ops.rs @@ -189,7 +189,7 @@ impl<'a> VM<'a> { if !(Val::INT_MIN..=Val::INT_MAX).contains(&i) { return b.format(i).into(); } let mut s = String::new(); s.push_str(b.format(i)); s.push_str(".0"); return s; } - return crate::modules::fstr::format_f64(f); + return crate::util::fstr::format_f64(f); } if v.is_true() { return "True".into(); } if v.is_false() { return "False".into(); } @@ -360,7 +360,7 @@ impl<'a> VM<'a> { push. Dedups via HashSet's bit-eq, consistent with how literals and method results are stored. */ fn alloc_set_value(&mut self, items: Vec) -> Result { - let mut s = crate::modules::fx::FxHashSet::default(); + let mut s = crate::util::fx::FxHashSet::default(); for v in items { s.insert(v); } self.heap.alloc(HeapObj::Set(Rc::new(RefCell::new(s)))) } diff --git a/compiler/src/modules/vm/types/mod.rs b/compiler/src/modules/vm/types/mod.rs index 7553573..49d4977 100644 --- a/compiler/src/modules/vm/types/mod.rs +++ b/compiler/src/modules/vm/types/mod.rs @@ -1,6 +1,6 @@ -use alloc::{rc::Rc, string::String, vec, vec::Vec}; +use alloc::{rc::Rc, string::String, vec::Vec}; use core::cell::RefCell; -use crate::modules::fx::{FxHashMap as HashMap, FxHashSet as HashSet}; +use crate::util::fx::{FxHashMap as HashMap, FxHashSet as HashSet}; pub mod coro; pub mod eq; @@ -62,15 +62,12 @@ impl ExternFn { } /* NaN-boxed 8-byte value: int (47-bit), float, bool, None, undef, or heap idx. - Tags live in the QNAN bit pattern; payload bits decide the variant. */ -const QNAN: u64 = 0x7FFC_0000_0000_0000; -const SIGN: u64 = 0x8000_0000_0000_0000; -const TAG_UNDEF: u64 = QNAN; // payload all zero — distinct from None/True/False/Heap -const TAG_NONE: u64 = QNAN | 1; -const TAG_TRUE: u64 = QNAN | 2; -const TAG_FALSE: u64 = QNAN | 3; -const TAG_INT: u64 = QNAN | SIGN; -const TAG_HEAP: u64 = QNAN | 4; + Layout sealed in `crate::abi::nan_box`; re-imported here as the single + source of truth across the wire codec and the VM. */ +use crate::abi::nan_box::{ + QNAN, SIGN, TAG_UNDEF, TAG_NONE, TAG_TRUE, TAG_FALSE, TAG_INT, TAG_HEAP, + INT_PAYLOAD_MASK, +}; #[derive(Clone, Copy, Debug)] pub struct Val(pub(crate) u64); @@ -112,7 +109,7 @@ impl Val { pub const INT_MAX: i64 = 0x0000_7FFF_FFFF_FFFF; pub const INT_MIN: i64 = -0x0000_8000_0000_0000; #[inline(always)] pub fn int(i: i64) -> Self { - Self(TAG_INT | (i as u64 & 0x0000_FFFF_FFFF_FFFF)) + Self(TAG_INT | (i as u64 & INT_PAYLOAD_MASK)) } #[inline(always)] pub fn int_checked(i: i64) -> Option { if !(Self::INT_MIN..=Self::INT_MAX).contains(&i) { None } else { Some(Self::int(i)) } @@ -139,9 +136,12 @@ impl Val { #[inline(always)] pub fn as_float(&self) -> f64 { f64::from_bits(self.0) } /* Public accessors for wire-format marshalling (FFI / WASM loader / SDK). */ #[inline(always)] pub fn raw(&self) -> u64 { self.0 } - #[inline(always)] pub fn from_raw(u: u64) -> Self { Self(u) } + /** # Safety + * `u` must come from `Val::raw()` on a live heap slot in the same VM. + * */ + #[inline(always)] pub unsafe fn from_raw(u: u64) -> Self { Self(u) } #[inline(always)] pub fn as_int(&self) -> i64 { - let raw = (self.0 & 0x0000_FFFF_FFFF_FFFF) as i64; + let raw = (self.0 & INT_PAYLOAD_MASK) as i64; (raw << 16) >> 16 } #[inline(always)] pub fn as_bool(&self) -> bool { self.0 == TAG_TRUE } @@ -354,6 +354,10 @@ pub struct HeapPool { bytes_intern: HashMap, u32>, // Cached Ellipsis slot index so `... is ...` is True (singleton parity). ellipsis_idx: Option, + /* Reused across every mark() call; capacity grows once and stays put so + GC never allocates from the same allocator it might be running short + of. Cleared, not freed, between traversals. */ + mark_worklist: Vec, } impl HeapPool { @@ -368,6 +372,7 @@ impl HeapPool { strings: HashMap::default(), bytes_intern: HashMap::default(), ellipsis_idx: None, + mark_worklist: Vec::with_capacity(64), } } @@ -413,14 +418,16 @@ impl HeapPool { pub fn mark(&mut self, v: Val) { if !v.is_heap() { return; } - let mut worklist = vec![v.as_heap()]; - while let Some(idx) = worklist.pop() { + /* Split borrow: closure needs &mut mark_worklist while we read slots. */ + let HeapPool { slots, mark_worklist, .. } = self; + mark_worklist.push(v.as_heap()); + while let Some(idx) = mark_worklist.pop() { let idx = idx as usize; - if self.slots[idx].marked { continue; } - self.slots[idx].marked = true; - if let Some(obj) = &self.slots[idx].obj { + if slots[idx].marked { continue; } + slots[idx].marked = true; + if let Some(obj) = &slots[idx].obj { for_each_val(obj, |val| { - if val.is_heap() { worklist.push(val.as_heap()); } + if val.is_heap() { mark_worklist.push(val.as_heap()); } }); } } diff --git a/compiler/src/modules/fstr.rs b/compiler/src/util/fstr.rs similarity index 68% rename from compiler/src/modules/fstr.rs rename to compiler/src/util/fstr.rs index 8a5b099..8032450 100644 --- a/compiler/src/modules/fstr.rs +++ b/compiler/src/util/fstr.rs @@ -22,39 +22,15 @@ pub fn format_f64(f: f64) -> alloc::string::String { format_general(f) } -/* 32-byte stack buffer fits any f64 default format. */ +/* f64 default format fits in ~24 bytes for most values; preallocate 32 to avoid + regrowth on the common case. Using String over a stack buffer trades a tiny + allocation for safety: no silent truncation, no from_utf8_unchecked. */ fn format_general(f: f64) -> alloc::string::String { - let mut buf = FmtBuf::new(); - let _ = core::fmt::write(&mut buf, core::format_args!("{}", f)); - alloc::string::String::from(buf.as_str()) -} - -struct FmtBuf { buf: [u8; 32], len: usize } -impl FmtBuf { - fn new() -> Self { Self { buf: [0u8; 32], len: 0 } } - fn as_str(&self) -> &str { - unsafe { core::str::from_utf8_unchecked(&self.buf[..self.len]) } - } -} -impl core::fmt::Write for FmtBuf { - fn write_str(&mut self, s: &str) -> core::fmt::Result { - let bytes = s.as_bytes(); - let end = (self.len + bytes.len()).min(self.buf.len()); - let n = end - self.len; - self.buf[self.len..end].copy_from_slice(&bytes[..n]); - self.len = end; - Ok(()) - } -} - -#[macro_export] -macro_rules! push { - ($s:ident, $v:literal) => { $s.push_str($v); }; - ($s:ident, str $v:expr) => { $s.push_str($v); }; - ($s:ident, int $v:expr) => {{ let mut b = itoa::Buffer::new(); $s.push_str(b.format($v)); }}; - ($s:ident, float $v:expr) => { $s.push_str(&$crate::modules::fstr::format_f64($v)); }; - ($s:ident, char $v:expr) => { $s.push($v); }; - ($s:ident, bool $v:expr) => { $s.push_str(if $v { "true" } else { "false" }); }; + use core::fmt::Write; + let mut out = alloc::string::String::with_capacity(32); + /* core::fmt::Write::write_fmt is infallible for a String. */ + let _ = write!(&mut out, "{}", f); + out } #[macro_export] @@ -63,7 +39,7 @@ macro_rules! s { (@b $s:ident; $l:literal $(, $($r:tt)*)?) => { $s.push_str($l); $($crate::s!(@b $s; $($r)*);)? }; (@b $s:ident; str $v:expr $(, $($r:tt)*)?) => { $s.push_str($v); $($crate::s!(@b $s; $($r)*);)? }; (@b $s:ident; int $v:expr $(, $($r:tt)*)?) => {{ let mut _b = itoa::Buffer::new(); $s.push_str(_b.format($v)); $($crate::s!(@b $s; $($r)*);)? }}; - (@b $s:ident; float $v:expr $(, $($r:tt)*)?) => { $s.push_str(&$crate::modules::fstr::format_f64($v)); $($crate::s!(@b $s; $($r)*);)? }; + (@b $s:ident; float $v:expr $(, $($r:tt)*)?) => { $s.push_str(&$crate::util::fstr::format_f64($v)); $($crate::s!(@b $s; $($r)*);)? }; (@b $s:ident; char $v:expr $(, $($r:tt)*)?) => { $s.push($v); $($crate::s!(@b $s; $($r)*);)? }; (@b $s:ident; bool $v:expr $(, $($r:tt)*)?) => { $s.push_str(if $v { "true" } else { "false" }); $($crate::s!(@b $s; $($r)*);)? }; (cap: $c:expr; $($t:tt)*) => {{ let mut _s = alloc::string::String::with_capacity($c); $crate::s!(@b _s; $($t)*); _s }}; @@ -102,5 +78,5 @@ impl From for alloc::string::String { fn from(e: E) -> Self { e.message() } } #[macro_export] macro_rules! err { - ($($t:tt)*) => { $crate::modules::fstr::E::Custom { msg: $crate::s!($($t)*) } }; + ($($t:tt)*) => { $crate::util::fstr::E::Custom { msg: $crate::s!($($t)*) } }; } diff --git a/compiler/src/modules/fx.rs b/compiler/src/util/fx.rs similarity index 68% rename from compiler/src/modules/fx.rs rename to compiler/src/util/fx.rs index 3dc5bd8..8663b71 100644 --- a/compiler/src/modules/fx.rs +++ b/compiler/src/util/fx.rs @@ -1,10 +1,12 @@ use core::hash::{BuildHasher, Hasher}; -use core::sync::atomic::{AtomicUsize, Ordering}; /* FxHash multiplier from rustc-hash. */ const K: u64 = 0x517cc1b727220a95; -static SEED_COUNTER: AtomicUsize = AtomicUsize::new(1); +/* Fixed seed: deterministic across runs so map iteration order is reproducible. + The compiler is single-threaded in trusted contexts (host-driven WASM), so + per-instance randomisation buys nothing here and breaks golden tests. */ +const FIXED_SEED: u64 = 0x9e3779b97f4a7c15; #[derive(Clone, Default)] pub struct FxHasher(u64); @@ -30,23 +32,8 @@ impl Hasher for FxHasher { pub struct FxBuildHasher(u64); impl FxBuildHasher { - /* Atomic counter seed, avalanche-mixed to decorrelate sequential values. */ #[inline] - pub fn new() -> Self { - let raw = SEED_COUNTER.fetch_add(1, Ordering::Relaxed) as u64; - Self(murmur3_fmix64(raw)) - } -} - -/* MurmurHash3 finalizer: spreads a 1-bit difference across all 64 bits. */ -#[inline] -fn murmur3_fmix64(mut h: u64) -> u64 { - h ^= h >> 33; - h = h.wrapping_mul(0xff51afd7ed558ccd); - h ^= h >> 33; - h = h.wrapping_mul(0xc4ceb9fe1a85ec53); - h ^= h >> 33; - h + pub fn new() -> Self { Self(FIXED_SEED) } } impl Default for FxBuildHasher { diff --git a/compiler/src/modules/sha256.rs b/compiler/src/util/sha256.rs similarity index 100% rename from compiler/src/modules/sha256.rs rename to compiler/src/util/sha256.rs diff --git a/compiler/tests/common.rs b/compiler/tests/common.rs index 32b99c8..23297b4 100644 --- a/compiler/tests/common.rs +++ b/compiler/tests/common.rs @@ -19,7 +19,7 @@ use std::cell::RefCell; use std::collections::{HashMap, HashSet}; use std::rc::Rc; -use compiler_lib::modules::fx::FxHashMap; +use compiler_lib::util::fx::FxHashMap; use compiler_lib::modules::packages::{ NativeBinding, Resolved, Resolver, Manifest, walk_up_dirs, dir_of, join_relative, diff --git a/documentation/implementation/design.md b/documentation/implementation/design.md index ee385f7..b4d3e4d 100644 --- a/documentation/implementation/design.md +++ b/documentation/implementation/design.md @@ -17,7 +17,7 @@ Classes are state containers, not the primary abstraction. Inheritance, descript - **Single-pass SSA codegen**: Variables are versioned per assignment (`x` -> `x_1`, `x_2`). Control-flow joins emit explicit `Phi` opcodes resolved at runtime. - **Token-threaded dispatch**: The instruction stream is `Vec` where each `Instruction` is `(opcode: OpCode, operand: u16)`. The hot loop is a flat `match` on the opcode variant. Rust lowers it to a jump table; this is *token threading*, not direct threading (computed-goto is not available in safe Rust). - **Per-instruction inline caching**: Each binary op records the type tags of its operands. After `QUICK_THRESH = 4` stable hits the IC stores a typed `FastOp` (`AddInt`, `AddFloat`, `AddStr`, `LtFloat`, `EqStr`, `ModInt`, ...) used as a speculative fast path with a type-guard deopt that invalidates the slot on miss. -- **Template memoisation**: Pure user functions cache `(args) -> result` after `TPL_THRESH = 2` hits, capped at 256 entries per function, gated on no-kw call and an outer scope that hasn't been observed performing impure ops (`StoreItem`, `StoreAttr`, `Raise`, `Yield`, `Global`, `Nonlocal`, `Import`, ...). Hashing uses an FNV-like fold over raw `Val.0` bits, with a value-eq verification step. +- **Template memoisation**: Pure user functions cache `(args) -> result` after `TPL_THRESH = 2` hits, capped at 256 entries per function, gated on no-kw call, an outer scope that hasn't been observed performing impure ops (`StoreItem`, `StoreAttr`, `Raise`, `Yield`, `Global`, `Nonlocal`, `Import`, ...), and on every argument being byte-stable (mutable containers — `list`, `dict`, `set`, `Instance` — disqualify the call from caching). Hashing uses an FNV-like fold over raw `Val.0` bits, with a value-eq verification step. - **NaN-boxed values**: `Val` is a 64-bit union encoding ints (47-bit signed, inline), floats (full IEEE-754 with NaNs canonicalised), bools, None, an undef sentinel, and 28-bit heap indices in a single word. - **Mark-and-sweep GC**: Triggered when `live >= gc_threshold` or `alloc_count >= max(live/4, 4096)`. After each sweep `gc_threshold = max(live * 2, 512)`. Roots include the stack, with-stack, yields, event queue, slots and live-slot snapshots, slot templates, globals, every iterator frame's `iter_stack`, opcode-cache constants, active const pools, and function templates. @@ -88,10 +88,11 @@ compiler/src/ │ ├── abi_bridge.rs # host_edge_op + dispatch_* │ ├── resolver.rs # walk-up packages.json + native bridge closure │ └── errors.rs + ├── util/ # Internal helpers shared across the compiler (not stdlib) + │ ├── fstr.rs # numeric formatter + s!/err! string macros + │ ├── fx.rs # FxHasher + fixed-seed FxBuildHasher (deterministic) + │ └── sha256.rs # in-tree FIPS 180-4 SHA-256 (used by integrity) └── modules/ - ├── fstr.rs # numeric formatter + s!/push!/err! string macros - ├── fx.rs # FxHasher + per-map seeded FxBuildHasher - ├── sha256.rs # in-tree FIPS 180-4 SHA-256 (used by integrity) ├── lexer/ │ ├── mod.rs │ ├── scan.rs diff --git a/documentation/implementation/lexical.md b/documentation/implementation/lexical.md index ea2bf51..00339cd 100644 --- a/documentation/implementation/lexical.md +++ b/documentation/implementation/lexical.md @@ -75,7 +75,7 @@ fr'raw fstring' # raw f-string """triple""" # triple-quoted, single or double ``` -A leading prefix is recognised before the opening quote by the identifier scanner and verified against `is_string_prefix`, `is_fstring_prefix`, or `is_bytes_prefix`. Triple-quoted strings span newlines and bump `line` for each `\n` inside. Backslash escapes are consumed at lex time but **decoded** by the parser, so escape semantics live alongside the literal type. Recognised escapes: `\n \t \r \\ \' \" \xHH \uHHHH \UHHHHHHHH` plus 1- to 3-digit octal escapes (`\012` -> `\n`, `\101` -> `A`). `\N{NAME}` Unicode-name escapes are not implemented and pass through as literal text — embedding the ~200 KB Unicode-name database is rejected as too costly for the WASM artifact. +A leading prefix is recognised before the opening quote by the identifier scanner and verified against `is_string_prefix`, `is_fstring_prefix`, or `is_bytes_prefix`. Triple-quoted strings span newlines and bump `line` for each `\n` inside. Backslash escapes are consumed at lex time but **decoded** by the parser, so escape semantics live alongside the literal type. Recognised escapes: `\n \t \r \a \b \f \v \\ \' \" \xHH \uHHHH \UHHHHHHHH` plus 1- to 3-digit octal escapes (`\012` -> `\n`, `\101` -> `A`). `\N{NAME}` Unicode-name escapes are not implemented and pass through as literal text — embedding the ~200 KB Unicode-name database is rejected as too costly for the WASM artifact. Lex-time errors anchor on the opening quote so the user's `^` marker points at the offender, not at end-of-line: diff --git a/documentation/reference/wasm-abi.md b/documentation/reference/wasm-abi.md index 7020118..d68925a 100644 --- a/documentation/reference/wasm-abi.md +++ b/documentation/reference/wasm-abi.md @@ -24,16 +24,23 @@ extern "C" fn (argv: *const u32, argc: u32, out: *mut u32) -> i32; Handles in `argv` are owned by the host and live for the duration of the call. Handles the guest creates via `edge_encode` or `edge_op` are owned by the guest until released — the guest must call `edge_release` on each before returning, **except** for the one written into `*out`. -## Required guest export +## Required guest exports In addition to the user functions, every guest module MUST export: ```rust #[unsafe(no_mangle)] pub extern "C" fn __edge_alloc(size: u32) -> *mut u8; + +#[unsafe(no_mangle)] +pub extern "C" fn __edge_abi_version() -> u32; ``` -The host calls this to stage `argv` arrays in the guest's linear memory before invoking each export. The reference `edge-pdk` crate emits it automatically. +`__edge_alloc` lets the host stage `argv` arrays in the guest's linear memory before invoking each export. + +`__edge_abi_version` returns the wire-format version this module targets (currently `1`). The host MUST read this symbol once at instantiation and refuse modules whose version it does not understand. Without the handshake, a host that has evolved beyond v1 would load a v1 module and decode garbage silently. + +The reference `edge-pdk` crate emits both symbols automatically (`EDGE_ABI_VERSION` is a `pub const` in the same crate). ## Host imports (6 functions) @@ -346,7 +353,7 @@ The Edge Python project distributes only this specification. The reference Rust - `#[plugin_fn]` proc macro that turns a typed Rust function into a wire-conformant export. - `FromValue` / `IntoValue` traits with primitive impls (`i64`, `f64`, `bool`, `String`, `&str`, `Option`, `Handle`). - `Handle` / `Value` / `Error` types wrapping handles with `Drop`-driven release. -- The required `__edge_alloc` export emitted automatically. +- The required `__edge_alloc` and `__edge_abi_version` exports emitted automatically. A typical author-side function with the macro: diff --git a/edge-pdk/Cargo.toml b/edge-pdk/Cargo.toml index 5aac071..ecf8455 100644 --- a/edge-pdk/Cargo.toml +++ b/edge-pdk/Cargo.toml @@ -11,4 +11,10 @@ crate-type = ["rlib"] doctest = false [dependencies] -macros = { path = "macros", version = "0.1.0" } +edge-pdk-macros = { path = "macros", version = "0.1.0" } + +# Re-exported only so `edge_pdk::module!()` can refer to the symbol path. +# Plugin authors who skip the macro and declare their own #[global_allocator] +# do not pay for this — Rust drops unused crates from the cdylib. +[target.'cfg(target_arch = "wasm32")'.dependencies] +lol_alloc = "0.4" diff --git a/edge-pdk/macros/Cargo.toml b/edge-pdk/macros/Cargo.toml index f15cddf..e11c2e9 100644 --- a/edge-pdk/macros/Cargo.toml +++ b/edge-pdk/macros/Cargo.toml @@ -1,10 +1,10 @@ [package] -name = "macros" +name = "edge-pdk-macros" version = "0.1.0" edition.workspace = true license.workspace = true repository.workspace = true -description = "Internal proc-macro for the `edge` crate. NOT FOR DIRECT USE — `edge` re-exports the `#[edge]` attribute. This crate exists separately because Rust requires proc-macros to live in a crate with `proc-macro = true`; nesting it at `edge/macros` makes the parent-child relationship explicit." +description = "Internal proc-macro for the `edge-pdk` crate. NOT FOR DIRECT USE — `edge-pdk` re-exports the `#[plugin_fn]` attribute. This crate exists separately because Rust requires proc-macros to live in a crate with `proc-macro = true`; nesting it at `edge-pdk/macros` makes the parent-child relationship explicit." [lib] proc-macro = true @@ -12,6 +12,6 @@ test = false doctest = false [dependencies] -proc-macro2 = "1" -quote = "1" -syn = { version = "2", features = ["full"] } +proc-macro2 = { workspace = true } +quote = { workspace = true } +syn = { workspace = true } diff --git a/edge-pdk/macros/src/lib.rs b/edge-pdk/macros/src/lib.rs index 02fe78f..cf201c9 100644 --- a/edge-pdk/macros/src/lib.rs +++ b/edge-pdk/macros/src/lib.rs @@ -51,7 +51,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { } FnArg::Receiver(_) => { return TokenStream::from(quote! { - compile_error!("#[edge] does not support methods (`self` parameter)"); + compile_error!("#[plugin_fn] does not support methods (`self` parameter)"); }); } } @@ -73,7 +73,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { let h = unsafe { *argv.add(#i) }; let #name: #ty = match <#ty as ::edge_pdk::FromValue>::from_handle(h) { Ok(v) => v, - Err(e) => { ::edge_pdk::__stash_error(e); return 1; } + Err(e) => { ::edge_pdk::__internals::stash_error(e); return 1; } }; } }).collect(); @@ -83,7 +83,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { quote! { match #impl_name(#(#arg_names),*) { Ok(v) => v, - Err(e) => { ::edge_pdk::__stash_error(e); return 1; } + Err(e) => { ::edge_pdk::__internals::stash_error(e); return 1; } } } } else { @@ -105,7 +105,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { out: *mut u32, ) -> i32 { if (argc as usize) != #argc_expected { - ::edge_pdk::__stash_error(::edge_pdk::Error::Type( + ::edge_pdk::__internals::stash_error(::edge_pdk::Error::Type( ::alloc::format!( "{} expects {} positional args, got {}", stringify!(#user_name), #argc_expected, argc))); @@ -121,7 +121,7 @@ pub fn plugin_fn(_attr: TokenStream, item: TokenStream) -> TokenStream { unsafe { *out = h.into_raw(); } 0 } - Err(e) => { ::edge_pdk::__stash_error(e); 1 } + Err(e) => { ::edge_pdk::__internals::stash_error(e); 1 } } } }; diff --git a/edge-pdk/src/lib.rs b/edge-pdk/src/lib.rs index 16dedfb..7c1a3aa 100644 --- a/edge-pdk/src/lib.rs +++ b/edge-pdk/src/lib.rs @@ -15,7 +15,8 @@ //! `edge_throw` / `edge_take_error`. //! * `FromValue` / `IntoValue` traits with primitive impls (`i64`, //! `f64`, `bool`, `String`, `&str`, `Option`, `Handle`). -//! * The `__edge_alloc` export the host shim needs for argv staging. +//! * The `__edge_alloc` export the host shim needs for argv staging +//! (lives in the hidden `__internals` module so glob imports stay clean). //! //! Author code: //! @@ -28,16 +29,64 @@ //! } //! ``` //! -//! The `#[plugin_fn]` attribute lives in the internal `macros` +//! The `#[plugin_fn]` attribute lives in the internal `edge-pdk-macros` //! sub-crate and is re-exported from here. #![cfg_attr(not(test), no_std)] extern crate alloc; -pub use macros::plugin_fn; +pub use edge_pdk_macros::plugin_fn; -use alloc::{string::{String, ToString}, vec::Vec}; +/* Curated public surface for plugin authors. Glob-importing the whole + crate exposes #[doc(hidden)] symbols (`__edge_alloc`, `__internals`) + which are part of the macro contract, not the user API. The prelude + re-exports just what `#[plugin_fn]` expansion needs and what most + plugins reach for: type wrappers, the attribute, the trait pair. + Recommended: `use edge_pdk::prelude::*;`. */ +pub mod prelude { + pub use crate::{plugin_fn, Handle, Value, Error, Result, FromValue, IntoValue}; +} + +/* ---------- Plugin bootstrap ----------------------------------------- */ + +/* Re-exported under a hidden path so `module!` can name lol_alloc without + forcing the plugin author to add it to their own Cargo.toml. */ +#[cfg(target_arch = "wasm32")] +#[doc(hidden)] +pub use lol_alloc as __lol_alloc; + +/* Emits the wasm32-only boilerplate every Edge Python plugin needs: + - a #[global_allocator] backed by lol_alloc::LeakingPageAllocator + (single-threaded bump allocator that matches the host model), + - a #[panic_handler] that traps via wasm32::unreachable. + + The plugin author still writes #![no_std] / #![no_main] / extern crate + alloc; at the crate root — those are crate-level attributes the macro + cannot inject from inside an item position. + + Usage: + edge_pdk::module!(); + + On non-wasm targets (e.g. host-side unit tests for the plugin) the + macro expands to nothing so cargo test still works. */ +#[macro_export] +macro_rules! module { + () => { + #[cfg(target_arch = "wasm32")] + #[global_allocator] + static __EDGE_PDK_ALLOC: $crate::__lol_alloc::LeakingPageAllocator + = $crate::__lol_alloc::LeakingPageAllocator; + + #[cfg(target_arch = "wasm32")] + #[panic_handler] + fn __edge_pdk_panic(_: &core::panic::PanicInfo) -> ! { + core::arch::wasm32::unreachable() + } + }; +} + +use alloc::{string::String, vec::Vec}; /* ---------- Wire imports --------------------------------------------- */ @@ -68,14 +117,17 @@ unsafe extern "C" { pub fn edge_throw(kind: u32, msg_ptr: *const u8, msg_len: u32); } -/// Stash an error so the host sees it after the export returns 1. -/// Used by the `#[edge]` macro when a user function returns Err(_). -#[doc(hidden)] -pub fn __stash_error(e: Error) { - let kind = e.kind(); - let msg = e.message().to_string(); - unsafe { edge_throw(kind, msg.as_ptr(), msg.len() as u32); } -} +/* ---------- ABI version handshake ------------------------------------ */ + +/* Wire-format version this PDK targets. Bump on any breaking change to + op codes, value tags, codec layout, or error kinds. The host loader + reads `__edge_abi_version` and refuses to instantiate a plugin whose + version it does not understand — without this, an evolved host would + load an old plugin and decode garbage silently. */ +pub const EDGE_ABI_VERSION: u32 = 1; + +#[unsafe(no_mangle)] +pub extern "C" fn __edge_abi_version() -> u32 { EDGE_ABI_VERSION } /* ---------- Op codes & tags (must match bridge.rs spec) -------------- */ @@ -100,17 +152,34 @@ pub mod tag { pub const BYTES: u32 = 4; } -/* ---------- Allocator the host calls to stage argv buffers ----------- */ +/* ---------- Internals — macro contract surface, not user API --------- */ -/// Host-side argv stager. The shim allocates space in this module's -/// linear memory before invoking each export; the layout is -/// [u32; argc] for argv and a single u32 for `out`. We use a leak-free -/// bump scheme — every call lives entirely on the heap, so the leak is -/// reclaimed when the WASM instance is torn down. -#[unsafe(no_mangle)] -pub extern "C" fn __edge_alloc(size: u32) -> *mut u8 { - let v = alloc::vec![0u8; size as usize]; - alloc::boxed::Box::into_raw(v.into_boxed_slice()) as *mut u8 +/* Sub-module so `use edge_pdk::*;` cannot pull these into a plugin + author's namespace. The `#[plugin_fn]` expansion qualifies the path + explicitly (`::edge_pdk::__internals::stash_error`), and `__edge_alloc` + stays a no_mangle WASM export regardless of Rust module nesting. */ +#[doc(hidden)] +pub mod __internals { + use super::Error; + use alloc::string::ToString; + + /* Used by #[plugin_fn] expansion when a user fn returns Err(_). */ + pub fn stash_error(e: Error) { + let kind = e.kind(); + let msg = e.message().to_string(); + unsafe { super::edge_throw(kind, msg.as_ptr(), msg.len() as u32); } + } + + /* Host-side argv stager. The shim allocates space in this module's + linear memory before invoking each export; the layout is + [u32; argc] for argv and a single u32 for `out`. We use a leak-free + bump scheme — every call lives entirely on the heap, so the leak is + reclaimed when the WASM instance is torn down. */ + #[unsafe(no_mangle)] + pub extern "C" fn __edge_alloc(size: u32) -> *mut u8 { + let v = alloc::vec![0u8; size as usize]; + alloc::boxed::Box::into_raw(v.into_boxed_slice()) as *mut u8 + } } /* ---------- Errors --------------------------------------------------- */ @@ -456,8 +525,9 @@ impl Handle { ) }; if r != 0 { return Err(last_error()); } - let v = i64::from_handle(out)?; - unsafe { edge_release(out); } - Ok(v) + /* Wrap into a Handle so Drop releases on every exit path, including + the `?` from a future from_handle that fails between decode and release. */ + let h = Handle::from_raw(out); + i64::from_handle(h.raw()) } } diff --git a/examples/slugify-mod/Cargo.toml b/examples/slugify-mod/Cargo.toml index 1195c52..27a3ec0 100644 --- a/examples/slugify-mod/Cargo.toml +++ b/examples/slugify-mod/Cargo.toml @@ -12,4 +12,4 @@ doctest = false [dependencies] edge-pdk = { path = "../../edge-pdk" } -lol_alloc = "0.4" +lol_alloc = { workspace = true }