Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
67e7e47
hoist deps to workspace.dependencies
dylan-sutton-chavez May 9, 2026
b77fb73
move fx fstr sha256 to util module
dylan-sutton-chavez May 10, 2026
02c834f
rename macros to edge-pdk-macros
dylan-sutton-chavez May 9, 2026
04359f4
move pdk internals to hidden submodule
dylan-sutton-chavez May 10, 2026
e455a86
centralize nanbox constants in abi
dylan-sutton-chavez May 10, 2026
1aa066e
extract read_src helper
dylan-sutton-chavez May 9, 2026
6db64c8
extract native binding closure to abi_bridge
dylan-sutton-chavez May 10, 2026
a1ea050
add edge_pdk prelude module
dylan-sutton-chavez May 9, 2026
454c9aa
add module! bootstrap macro to edge-pdk
dylan-sutton-chavez May 10, 2026
9ec804e
export edge_abi_version from pdk
dylan-sutton-chavez May 10, 2026
160d56b
add wasm_free symmetric to wasm_alloc
dylan-sutton-chavez May 9, 2026
dcc7a9c
drop unused package re-export aliases
dylan-sutton-chavez May 9, 2026
3e0eea6
fix plugin_fn macro error message
dylan-sutton-chavez May 9, 2026
2a4691b
correct host_edge_encode doc comment
dylan-sutton-chavez May 9, 2026
38e1ddd
fix raw string prefix detection
dylan-sutton-chavez May 9, 2026
198b227
add bell backspace form-feed vtab escapes
dylan-sutton-chavez May 9, 2026
50cabfd
generalize integer literal overflow message
dylan-sutton-chavez May 9, 2026
dc19cb8
split fast-path outcomes for ic stability
dylan-sutton-chavez May 9, 2026
8f4a607
skip memoization for mutable args
dylan-sutton-chavez May 9, 2026
008e6c7
reuse gc mark worklist across roots
dylan-sutton-chavez May 9, 2026
2272d68
drop unreachable dispatch_generic arm
dylan-sutton-chavez May 9, 2026
674a57e
drop FmtBuf for plain string
dylan-sutton-chavez May 9, 2026
e0b3dca
drop unused push macro
dylan-sutton-chavez May 9, 2026
e4668f8
use fixed seed for fx hasher
dylan-sutton-chavez May 9, 2026
5ab64b6
mark Val from_raw unsafe
dylan-sutton-chavez May 9, 2026
c410143
remove expect and unwrap on ffi dispatch paths
dylan-sutton-chavez May 10, 2026
755b448
validate utf-8 on host input buffer
dylan-sutton-chavez May 9, 2026
3aaa0d5
release handle via raii in Handle len
dylan-sutton-chavez May 9, 2026
150447f
stash panic message before trap
dylan-sutton-chavez May 9, 2026
3b875b8
docs(vm): add safety docstring for a unsafe function to address linter.
dylan-sutton-chavez May 10, 2026
c9d2ba3
refactor(reports): remove commits reports in the actual branch.
dylan-sutton-chavez May 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@ edition = "2024"
license = "MIT OR Apache-2.0"
repository = "https://github.com/dylan-sutton-chavez/edge-python/"

# Single source of truth for third-party versions. Members opt in with
# `dep = { workspace = true }` so a bump touches one line workspace-wide
# and the lockfile cannot grow accidental version splits.
[workspace.dependencies]
hashbrown = { version = "0.17", default-features = false }
itoa = "1"
lol_alloc = "0.4"
proc-macro2 = "1"
quote = "1"
syn = { version = "2", features = ["full"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"

[profile.release]
opt-level = "z"
lto = true
Expand Down
12 changes: 6 additions & 6 deletions compiler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ name = "tests"
path = "tests/main.rs"

[dependencies]
hashbrown = { version = "0.17", default-features = false }
itoa = "1"
hashbrown = { workspace = true }
itoa = { workspace = true }

# WASM-only global allocator. Pulled into wasm32 builds automatically; absent
# on host builds (where std's allocator is fine and `compiler.wasm` is just a
# library artifact, not the runtime target).
[target.'cfg(target_arch = "wasm32")'.dependencies]
lol_alloc = "0.4"
lol_alloc = { workspace = true }

# Test-only deps for the JSON-driven test runner. None of these are pulled
# into the release `compiler.wasm`.
[dev-dependencies]
hashbrown = { version = "0.17", default-features = false, features = ["serde"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
hashbrown = { workspace = true, features = ["serde"] }
serde = { workspace = true }
serde_json = { workspace = true }
7 changes: 4 additions & 3 deletions compiler/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,11 @@ Mark-and-sweep with roots: operand stack, with-stack, pending yields, event queu
│ │ ├── abi_bridge.rs
│ │ ├── resolver.rs
│ │ └── errors.rs
│ ├── util
│ │ ├── fstr.rs
│ │ ├── fx.rs
│ │ └── sha256.rs
│ └── modules
│ ├── fstr.rs
│ ├── fx.rs
│ ├── sha256.rs
│ ├── lexer
│ │ ├── mod.rs
│ │ ├── scan.rs
Expand Down
45 changes: 27 additions & 18 deletions compiler/src/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,23 @@ Sealed contract for modules.

use alloc::{string::String, vec::Vec};

/* Source-of-truth NaN-boxing layout. Both the wire codec below and
vm::types::Val import from here, so any change touches one site
instead of three. Reserved for the `Sealed contract — v1` set: a
layout change forces a wasm-abi version bump. */
pub mod nan_box {
pub const QNAN: u64 = 0x7FFC_0000_0000_0000;
pub const SIGN: u64 = 0x8000_0000_0000_0000;
pub const TAG_UNDEF: u64 = QNAN;
pub const TAG_NONE: u64 = QNAN | 1;
pub const TAG_TRUE: u64 = QNAN | 2;
pub const TAG_FALSE: u64 = QNAN | 3;
pub const TAG_INT: u64 = QNAN | SIGN;
pub const TAG_HEAP: u64 = QNAN | 4;
/* 47-bit signed integer payload mask (two's-complement, sign bit at bit 47). */
pub const INT_PAYLOAD_MASK: u64 = 0x0000_FFFF_FFFF_FFFF;
}

/* Op codes (sealed) */

#[allow(non_camel_case_types)]
Expand Down Expand Up @@ -184,27 +201,22 @@ pub enum EncodeRequest<'a> {
Invalid,
}

// Maps (tag, bytes) to EncodeRequest. NaN-boxing layout is sealed here; changes require ABI bump.
// Maps (tag, bytes) to EncodeRequest. NaN-boxing layout is sealed in `nan_box`; changes require ABI bump.
pub fn classify_encode(tag: u32, bytes: &[u8]) -> EncodeRequest<'_> {
/* NaN-boxing constants; must match host Val impl. */
const QNAN: u64 = 0x7FFC_0000_0000_0000;
const TAG_NONE_BITS: u64 = QNAN | 1;
const TAG_TRUE_BITS: u64 = QNAN | 2;
const TAG_FALSE_BITS: u64 = QNAN | 3;
const TAG_INT_BITS: u64 = QNAN | 0x8000_0000_0000_0000;
use nan_box::*;

match Tag::from_u32(tag) {
Some(Tag::None) => EncodeRequest::Direct(TAG_NONE_BITS),
Some(Tag::None) => EncodeRequest::Direct(TAG_NONE),
Some(Tag::Bool) => {
let b = !bytes.is_empty() && bytes[0] != 0;
EncodeRequest::Direct(if b { TAG_TRUE_BITS } else { TAG_FALSE_BITS })
EncodeRequest::Direct(if b { TAG_TRUE } else { TAG_FALSE })
}
Some(Tag::Int) => {
if bytes.len() != 8 { return EncodeRequest::Invalid; }
let mut buf = [0u8; 8];
buf.copy_from_slice(bytes);
let i = i64::from_le_bytes(buf);
EncodeRequest::Direct(TAG_INT_BITS | (i as u64 & 0x0000_FFFF_FFFF_FFFF))
EncodeRequest::Direct(TAG_INT | (i as u64 & INT_PAYLOAD_MASK))
}
Some(Tag::Float) => {
if bytes.len() != 8 { return EncodeRequest::Invalid; }
Expand Down Expand Up @@ -245,10 +257,7 @@ impl PrimitiveBytes {

// Classifies Val bits into Primitive/Heap/Invalid; Heap means host must read from HeapPool.
pub fn classify_decode(val_bits: u64) -> DecodeBits {
/* Same NaN-boxing constants as classify_encode. */
const QNAN: u64 = 0x7FFC_0000_0000_0000;
const SIGN: u64 = 0x8000_0000_0000_0000;
const TAG_INT: u64 = QNAN | SIGN;
use nan_box::*;

// Float: any non-QNAN-tagged pattern.
if (val_bits & QNAN) != QNAN {
Expand All @@ -259,7 +268,7 @@ pub fn classify_decode(val_bits: u64) -> DecodeBits {
}
// Int: QNAN|SIGN with payload.
if (val_bits & (QNAN | SIGN)) == TAG_INT {
let raw = (val_bits & 0x0000_FFFF_FFFF_FFFF) as i64;
let raw = (val_bits & INT_PAYLOAD_MASK) as i64;
let sign_extended = (raw << 16) >> 16;
return DecodeBits::Primitive {
tag: Tag::Int as u32,
Expand All @@ -269,17 +278,17 @@ pub fn classify_decode(val_bits: u64) -> DecodeBits {
// Singletons and heap handles.
let lower = val_bits & 0xF;
if (val_bits & QNAN) == QNAN && (val_bits & SIGN) == 0 {
if val_bits == QNAN | 1 {
if val_bits == TAG_NONE {
return DecodeBits::Primitive {
tag: Tag::None as u32, bytes: PrimitiveBytes::None,
};
}
if val_bits == QNAN | 2 {
if val_bits == TAG_TRUE {
return DecodeBits::Primitive {
tag: Tag::Bool as u32, bytes: PrimitiveBytes::Bool(1),
};
}
if val_bits == QNAN | 3 {
if val_bits == TAG_FALSE {
return DecodeBits::Primitive {
tag: Tag::Bool as u32, bytes: PrimitiveBytes::Bool(0),
};
Expand Down
13 changes: 10 additions & 3 deletions compiler/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@ pub mod abi;
#[cfg(target_arch = "wasm32")]
pub mod main;

pub mod modules {
/* Internal helpers shared across the compiler — not Edge Python language
modules. Kept separate from `modules/` (which contains lexer/parser/vm/
packages — runtime components) so contributors don't mistake utility
code for built-in stdlib. */
pub mod util {
pub mod fx;
pub mod fstr;
pub mod sha256;
}

pub mod modules {
pub mod lexer;
pub mod vm;
pub mod parser;
pub mod packages;
pub mod fstr;
pub mod sha256;
}
68 changes: 59 additions & 9 deletions compiler/src/main/abi_bridge.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use crate::abi::{classify_decode, classify_encode, DecodeBits, EncodeRequest, Op, PrimitiveBytes, TAG_INVALID};
use crate::abi::{classify_decode, classify_encode, DecodeBits, EncodeRequest, ErrorKind, Op, PrimitiveBytes, TAG_INVALID};
use crate::modules::vm::types::{HeapObj, Val, VmErr};
use crate::modules::vm::handlers::methods::{lookup_method, dispatch_method};
use alloc::{rc::Rc, string::{String, ToString}, vec, vec::Vec};
use crate::modules::packages::NativeBinding;
use alloc::{rc::Rc, string::{String, ToString}, sync::Arc, vec, vec::Vec};
use core::cell::RefCell;
use crate::s;

use super::{error_stash, get_val, handles, put_val, with_recv, with_vm};
use super::errors::stash_error;
use super::{error_stash, get_val, handles, host_call_native, put_val, with_recv, with_vm};
use super::errors::{error_from_kind, stash_error};

// Universal dispatch. Returns 0 + handle in `*out_handle`, or 1 + stashed error.
#[unsafe(no_mangle)]
Expand Down Expand Up @@ -47,7 +48,10 @@ fn dispatch_call(recv_h: u32, name: &str, args: Vec<Val>) -> Result<Val, VmErr>
if vm.stack.len() != stack_before + 1 {
return Err(VmErr::Runtime("edge_op call: method left no result"));
}
Ok(vm.stack.pop().unwrap())
/* The length check above guarantees a value is present; ok_or
keeps the FFI boundary panic-free if a future change drops
the invariant. */
vm.stack.pop().ok_or(VmErr::Runtime("edge_op call: stack drained mid-dispatch"))
})
}

Expand Down Expand Up @@ -124,7 +128,7 @@ fn dispatch_get_item(recv_h: u32, args: &[Val]) -> Result<Val, VmErr> {
if vm.stack.len() != stack_before + 1 {
return Err(VmErr::Runtime("edge_op get_item: get_item left no result"));
}
Ok(vm.stack.pop().unwrap())
vm.stack.pop().ok_or(VmErr::Runtime("edge_op get_item: stack drained mid-dispatch"))
})
}

Expand Down Expand Up @@ -211,7 +215,7 @@ fn dispatch_iter_next(recv_h: u32) -> Result<Val, VmErr> {
})
}

// Bootstrap decoder: writes tag to `*out_tag`, bytes to `dst[..dst_max]`.
// Bootstrap encoder: classifies (tag, bytes) into a Val handle; returns 0 on Invalid.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn host_edge_encode(tag: u32, ptr: *const u8, len: u32) -> u32 {
let bytes = if len == 0 || ptr.is_null() {
Expand Down Expand Up @@ -301,8 +305,12 @@ pub unsafe extern "C" fn host_edge_take_error(out_kind: *mut u32, dst: *mut u8,
None => return -1,
};
if len > dst_max as usize { return -(len as i32); }
// Buffer fits — drain and copy.
let (_, msg) = stash.take().expect("peek returned Some");
// Buffer fits — drain and copy. Treat a None on take() as "race lost the
// peek/take window" and surface it as no-pending-error rather than panic
// across the FFI boundary (the .expect() previously here violated
// "panics never cross FFI"; in single-threaded WASM this is unreachable
// today but we don't want a future scheduler change to weaponise it).
let Some((_, msg)) = stash.take() else { return -1; };
let bytes = msg.as_bytes();
unsafe {
*out_kind = kind;
Expand All @@ -312,3 +320,45 @@ pub unsafe extern "C" fn host_edge_take_error(out_kind: *mut u32, dst: *mut u8,
}
bytes.len() as i32
}

/* Builds a NativeBinding whose closure translates a VM CallExtern call
into the universal wire ABI: stage args as handles, invoke
`host_call_native(id, ...)`, drain status into Result<Val, VmErr>, and
release every handle (including out_handle) before returning.

Lives here — not in resolver.rs — because the body is pure ABI
marshalling. The resolver only needs to ask "give me a binding for
(name, id)" and forget about handle plumbing. `pure: false` because
any guest call may have side effects through the host. */
pub(super) fn make_native_binding(name: String, id: u32) -> NativeBinding {
let closure = move |_: &mut crate::modules::vm::types::HeapPool, args: &[Val]| -> Result<Val, VmErr> {
/* 1. Register args as handles the guest will see. */
let argv: Vec<u32> = args.iter().map(|v| put_val(*v)).collect();
let mut out_handle: u32 = 0;

/* 2. Call guest export through the host shim. */
let status = unsafe {
host_call_native(
id,
argv.as_ptr(), argv.len() as u32,
&mut out_handle as *mut u32,
)
};

/* 3. Translate status/out_handle into Result<Val>. Read result
BEFORE releasing — argv release frees the slots `result` may
reference if the guest returned one of its inputs. */
if status != 0 {
for h in &argv { handles().release(*h); }
let (kind, msg) = error_stash().take()
.unwrap_or((ErrorKind::Runtime as u32, String::from("native call failed")));
return Err(error_from_kind(kind, msg));
}
let result = get_val(out_handle)
.ok_or(VmErr::Runtime("native returned invalid handle"))?;
for h in &argv { handles().release(*h); }
handles().release(out_handle);
Ok(result)
};
NativeBinding { name, func: Arc::new(closure), pure: false }
}
40 changes: 30 additions & 10 deletions compiler/src/main/exports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ pub unsafe extern "C" fn wasm_alloc(size: u32) -> *mut u8 {
Box::into_raw(v.into_boxed_slice()) as *mut u8
}

/* Releases a buffer previously returned by `wasm_alloc`. The host MUST pass
the exact same `size` it requested; mismatched lengths reconstruct the
wrong Box layout. Calling with a null pointer or `size == 0` is a no-op. */
#[unsafe(no_mangle)]
pub unsafe extern "C" fn wasm_free(ptr: *mut u8, size: u32) {
if ptr.is_null() || size == 0 { return; }
unsafe {
let slice = core::slice::from_raw_parts_mut(ptr, size as usize);
let _ = Box::from_raw(slice as *mut [u8]);
}
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn register_code_module(
spec_ptr: *const u8, spec_len: u32,
Expand Down Expand Up @@ -68,12 +80,20 @@ pub unsafe extern "C" fn reset_modules() {
error_stash().clear();
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn extract_imports(len: usize) -> usize {
/* Reads up to SZ bytes from the host-owned SRC buffer and validates UTF-8.
`len` is capped so the slice never extends past the buffer; callers decide
how to surface a UTF-8 failure (silent vs. user-facing error). */
unsafe fn read_src(len: usize) -> Result<&'static str, core::str::Utf8Error> {
let len = len.min(SZ);
let src = match core::str::from_utf8(unsafe {
let bytes = unsafe {
core::slice::from_raw_parts(core::ptr::addr_of!(SRC) as *const u8, len)
}) {
};
core::str::from_utf8(bytes)
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn extract_imports(len: usize) -> usize {
let src = match unsafe { read_src(len) } {
Ok(s) => s,
Err(_) => return unsafe { write_out("") },
};
Expand All @@ -84,10 +104,7 @@ pub unsafe extern "C" fn extract_imports(len: usize) -> usize {

#[unsafe(no_mangle)]
pub unsafe extern "C" fn run(len: usize) -> usize {
let len = len.min(SZ);
let src = match core::str::from_utf8(unsafe {
core::slice::from_raw_parts(core::ptr::addr_of!(SRC) as *const u8, len)
}) {
let src = match unsafe { read_src(len) } {
Ok(s) => s,
Err(e) => return unsafe {
write_out(&s!("input rejected: invalid utf-8 at byte ", int e.valid_up_to()))
Expand Down Expand Up @@ -116,9 +133,12 @@ pub unsafe extern "C" fn run(len: usize) -> usize {
vm.strict_input = true;
let inp_len = unsafe { INP_LEN };
if inp_len > 0 {
let inp = unsafe { core::str::from_utf8_unchecked(
/* Host-supplied buffer; validate UTF-8 to keep the FFI boundary safe.
Invalid bytes degrade to an empty input rather than UB. */
let bytes = unsafe {
core::slice::from_raw_parts(core::ptr::addr_of!(INP) as *const u8, inp_len)
)};
};
let inp = core::str::from_utf8(bytes).unwrap_or("");
vm.input_buffer = inp.split('\n').map(alloc::string::String::from).collect();
unsafe { INP_LEN = 0; }
}
Expand Down
16 changes: 15 additions & 1 deletion compiler/src/main/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,22 @@ A ~3,000-alloc perceptron run pays ~600 ms; bumping cuts it to ~50 grows.
#[global_allocator]
static A: AssumeSingleThreaded<LeakingAllocator> = unsafe { AssumeSingleThreaded::new(LeakingAllocator::new()) };

/* Best-effort panic-to-stash: the host's edge_take_error then sees a typed
message instead of an opaque WASM trap. If the format allocation itself
re-enters this handler we fall through to unreachable(); the host trap
behaviour is unchanged from the previous bare implementation. */
#[panic_handler]
fn panic(_: &core::panic::PanicInfo) -> ! { core::arch::wasm32::unreachable() }
fn panic(info: &core::panic::PanicInfo) -> ! {
let msg = alloc::format!("internal panic: {}", info.message());
unsafe {
let p = core::ptr::addr_of_mut!(ERROR_STASH);
if (*p).is_none() { *p = Some(ErrorStash::new()); }
if let Some(stash) = (*p).as_mut() {
stash.set(crate::abi::ErrorKind::Runtime as u32, msg);
}
}
core::arch::wasm32::unreachable()
}

pub(super) const SZ: usize = 1 << 20;
#[allow(non_upper_case_globals)]
Expand Down
Loading
Loading