trufnetwork
diff --git a/‎README.md‎
Lines changed: 58 additions & 0 deletions b/‎README.md‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎bindings/bindings.go‎
Lines changed: 88 additions & 1 deletion b/‎bindings/bindings.go‎
Lines changed: 88 additions & 1 deletion
diff --git a/‎docs/api-reference.md‎
Lines changed: 88 additions & 0 deletions b/‎docs/api-reference.md‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎examples/bulk_insert_example/README.md‎
Lines changed: 101 additions & 0 deletions b/‎examples/bulk_insert_example/README.md‎
Lines changed: 101 additions & 0 deletions
@@ -117,6 +117,64 @@ for record in records:
     print(f"Date: {date.strftime('%Y-%m-%d')}, Value: {record['Value']}")
 ```
 
+### High-Throughput Insertion with `BulkInserter`
+
+When inserting more than a few hundred records from a single signer, looping
+`client.batch_insert_records(...)` is dramatically slower than it needs to be:
+each call forces a wait-for-inclusion (~1–2s per block) before the next
+broadcast, so 1,000 records can take 25+ minutes.
+
+`BulkInserter` instead caches the nonce locally and broadcasts each chunk
+fire-and-forget, draining inflight transactions in batches via `WaitTx`. It
+handles `invalid nonce` (resets cache, retries) and `mempool full` (backs off,
+keeps cache) automatically.
+
+```python
+from trufnetwork_sdk_py import TNClient, BulkInserter, BulkInsertError
+
+client = TNClient("https://gateway.testnet.truf.network", "YOUR_PRIVATE_KEY")
+inserter = BulkInserter(client)
+
+batches = [
+    {
+        "stream_id": "st...",
+        "inputs": [
+            {"date": 1700000000, "value": 1.5},
+            # ... thousands more
+        ],
+    },
+]
+
+try:
+    tx_hashes = inserter.insert_all(batches)
+    print(f"broadcast {len(tx_hashes)} transactions")
+except BulkInsertError as e:
+    # e.tx_hashes — list of hashes broadcast before failure
+    # e.failed_chunk_index — chunk that failed (or total chunks if drain_failure)
+    # e.drain_failure — True if WaitTx failed after all broadcasts succeeded
+    print(f"bulk insert failed: {e}; recovered {len(e.tx_hashes)} partial hashes")
+```
+
+**Constraints:**
+
+- One `BulkInserter` per signer key — concurrent inserters from the same
+  signer collide on nonces (the mempool admits transactions in strict nonce
+  order).
+- Different signers run safely in parallel (independent nonce sequences).
+- Records may mix stream IDs within a chunk — the inserter flattens batches
+  and chunks by total record count.
+
+**Tunables** (defaults shown):
+
+```python
+BulkInserter(
+    client,
+    batch_size=10,      # records per insert_records tx; protocol cap is 10
+    max_inflight=200,   # broadcasts queued before forced drain via WaitTx
+    max_attempts=5,     # initial + retries on transient errors
+)
+```
+
 ## Understanding Transaction Lifecycle
 
 **IMPORTANT:** All transaction operations return success when transactions enter the mempool, NOT when they are executed on-chain. This async behavior can cause race conditions if not handled properly.
 
@@ -212,7 +212,78 @@ func InsertRecords(client *tnclient.Client, inputs []types.InsertRecordInput) (s
 	return txHash.String(), nil
 }
 
-// NewInsertRecordInput creates a new InsertRecordInput struct
+// NewBulkInserter constructs a BulkInserter wired to the given TNClient.
+// Wraps tnclient.Client.LoadBulkInserter for gopy export to Python.
+//
+// batchSize: records per insert_records tx (must be <= protocol cap of 10).
+// maxInflight: how many broadcasts may queue before draining via WaitTx.
+// maxAttempts: max attempts per chunk (initial + retries) on transient
+// errors (invalid nonce, mempool full).
+// Pass 0 for any of these to use defaults (10, 200, 5).
+func NewBulkInserter(client *tnclient.Client, batchSize int, maxInflight int, maxAttempts int) (*contractsapi.BulkInserter, error) {
+	if client == nil {
+		return nil, fmt.Errorf("client is required")
+	}
+	var opts []contractsapi.BulkInserterOption
+	if batchSize > 0 {
+		opts = append(opts, contractsapi.WithBatchSize(batchSize))
+	}
+	if maxInflight > 0 {
+		opts = append(opts, contractsapi.WithMaxInflight(maxInflight))
+	}
+	if maxAttempts > 0 {
+		opts = append(opts, contractsapi.WithMaxAttempts(maxAttempts))
+	}
+	return client.LoadBulkInserter(opts...)
+}
+
+// BulkInsertResult is the gopy-friendly return shape for BulkInsertAll.
+//
+// It always carries TxHashes (the partial hashes broadcast so far) so that
+// Python callers can recover from a failure — gopy discards a (result, error)
+// tuple's result when the error is non-nil, hence the explicit struct.
+//
+// On success: ErrorMsg is "" and the other fields are zero.
+// On failure: ErrorMsg is the formatted error string, FailedChunkIndex is
+// either the index of the failing broadcast chunk (when DrainFailure is false)
+// or the total chunks broadcast (when DrainFailure is true).
+type BulkInsertResult struct {
+	TxHashes         []string
+	ErrorMsg         string
+	DrainFailure     bool
+	FailedChunkIndex int
+}
+
+// BulkInsertAll runs InsertAll against the given inserter and returns a
+// BulkInsertResult that always carries the partial tx hashes (for recovery)
+// alongside any error info.
+func BulkInsertAll(b *contractsapi.BulkInserter, inputs []types.InsertRecordInput) *BulkInsertResult {
+	res := &BulkInsertResult{}
+	if b == nil {
+		res.ErrorMsg = "inserter is required"
+		return res
+	}
+	hashes, err := b.InsertAll(context.Background(), inputs)
+	res.TxHashes = make([]string, len(hashes))
+	for i, h := range hashes {
+		res.TxHashes[i] = h.String()
+	}
+	if err != nil {
+		res.ErrorMsg = err.Error()
+		var bie *contractsapi.BulkInsertError
+		if errors.As(err, &bie) {
+			res.DrainFailure = bie.DrainFailure
+			res.FailedChunkIndex = bie.FailedChunkIndex
+		}
+	}
+	return res
+}
+
+// NewInsertRecordInput creates a new InsertRecordInput struct.
+//
+// Resolves the data provider via GetCurrentAccount on every call. Suitable
+// for one-off inserts; for bulk paths, prefer NewInsertRecordInputForProvider
+// to avoid the per-record account lookup.
 func NewInsertRecordInput(client *tnclient.Client, streamId string, date int, val float64) types.InsertRecordInput {
 	dataProvider, err := GetCurrentAccount(client)
 	if err != nil {
@@ -228,6 +299,22 @@ func NewInsertRecordInput(client *tnclient.Client, streamId string, date int, va
 	}
 }
 
+// NewInsertRecordInputForProvider creates an InsertRecordInput with an
+// explicit data provider, skipping the per-call GetCurrentAccount lookup.
+//
+// Use this when building many inputs for a single signer (the bulk insertion
+// path): resolve the data provider once via GetCurrentAccount, then call this
+// helper for each record. Avoids redundant account RPC roundtrips and makes
+// errors loud (the caller controls validation).
+func NewInsertRecordInputForProvider(dataProvider string, streamId string, date int, val float64) types.InsertRecordInput {
+	return types.InsertRecordInput{
+		StreamId:     streamId,
+		DataProvider: dataProvider,
+		EventTime:    date,
+		Value:        val,
+	}
+}
+
 // NewGetRecordInput creates a new GetRecordInput struct
 func NewGetRecordInput(
 	client *tnclient.Client,
 
@@ -220,6 +220,94 @@ batches = [
 tx_hash = client.batch_insert_records(batches)
 ```
 
+### `BulkInserter` — high-throughput pipelined insertion
+
+When you need to push hundreds or thousands of records from a single signer,
+use `BulkInserter` instead of looping `batch_insert_records`. It caches the
+nonce locally and broadcasts each chunk fire-and-forget — admission (~50ms)
+becomes the rate limit instead of inclusion (~1–2s per block).
+
+#### `BulkInserter(client, batch_size=10, max_inflight=200, max_attempts=5)`
+
+Wraps the sdk-go `BulkInserter` (see
+[`sdk-go/core/contractsapi/bulk_inserter.go`](https://github.com/trufnetwork/sdk-go/blob/main/core/contractsapi/bulk_inserter.go)).
+Mirrors the cached-nonce pattern from `node/extensions/tn_attestation/extension.go`
+(PR `kwilteam/node#1356`) which solves the same problem on the node side.
+
+#### Parameters
+
+- `client: TNClient` — must use HTTP transport (the default).
+- `batch_size: int = 10` — records per `insert_records` transaction. Must be ≤ the protocol cap (currently 10).
+- `max_inflight: int = 200` — broadcasts queued before draining via `WaitTx`.
+- `max_attempts: int = 5` — initial attempt + retries per chunk on transient errors (`invalid nonce`, `mempool full`).
+
+#### `inserter.insert_all(batches: List[RecordBatch]) -> List[str]`
+
+Flattens `batches` into a single record list, chunks by `batch_size`,
+broadcasts each chunk pipelined, and drains every `max_inflight` plus once at
+the end. Returns the tx hashes in submission order.
+
+Records may mix stream IDs within a chunk — the inserter chunks by total record
+count, not by stream.
+
+#### Raises
+
+`BulkInsertError` — chunk failed after exhausting retries. The exception carries:
+
+- `tx_hashes: List[str]` — tx hashes broadcast successfully before the failure
+- `drain_failure: bool` — `True` when all chunks broadcast but the final `WaitTx` failed
+- `failed_chunk_index: int` — index of the failing chunk (broadcast failure) or total chunks broadcast (drain failure)
+
+Use these to recover: when `drain_failure` is `False`, resume from
+`records[failed_chunk_index * batch_size:]` after fixing the underlying issue.
+When `drain_failure` is `True`, all hashes are in `tx_hashes` — investigate
+inclusion separately (the broadcast itself succeeded).
+
+#### Example
+
+```python
+from trufnetwork_sdk_py import TNClient, BulkInserter, BulkInsertError
+
+client = TNClient("https://gateway.testnet.truf.network", "YOUR_PRIVATE_KEY")
+inserter = BulkInserter(client)
+
+batches = [
+    {
+        "stream_id": "st...",
+        "inputs": [
+            {"date": 1700000000, "value": 1.5},
+            # ... thousands more
+        ],
+    },
+]
+
+try:
+    tx_hashes = inserter.insert_all(batches)
+    print(f"broadcast {len(tx_hashes)} transactions")
+except BulkInsertError as e:
+    print(f"bulk insert failed: {e}")
+    print(f"  partial hashes: {len(e.tx_hashes)}")
+    print(f"  failed at chunk: {e.failed_chunk_index}")
+    print(f"  drain failure: {e.drain_failure}")
+    # If !e.drain_failure, resume from records[e.failed_chunk_index * 10:]
+```
+
+#### Constraints
+
+- **One BulkInserter per signer key.** The cache is per-instance; concurrent
+  inserters from the same signer collide on nonces because the mempool admits
+  transactions strictly in nonce order.
+- **Sequential per signer, not concurrent.** Out-of-order HTTP arrival from
+  one signer triggers `invalid nonce` rejections; the helper is single-threaded
+  by design.
+- **Different signers run in parallel.** Per-signer nonces are independent.
+
+#### Working example
+
+See [`examples/bulk_insert_example`](../examples/bulk_insert_example) for a
+full lifecycle demo: connect → drop existing → deploy → bulk-insert → fetch +
+verify → drop.
+
 ## Stream Querying
 
 ### `client.get_records(stream_id: str, **kwargs) -> List[Dict]`
 
@@ -0,0 +1,101 @@
+# Bulk Insert Example (Python)
+
+Demonstrates `BulkInserter` — pipelined high-throughput record insertion that
+keeps a single signer within the protocol's 10-row-per-tx cap while
+broadcasting hundreds of transactions per minute.
+
+## What it does
+
+1. Connects to a local TN node with the dev private key
+2. Generates a stream ID and best-effort drops any existing stream with that ID
+3. Deploys a fresh primitive stream
+4. Bulk-inserts 25 synthetic records via `BulkInserter` (3 chunks of 10/10/5)
+5. Reads the records back and confirms count + values
+6. Drops the test stream
+
+## Why use BulkInserter
+
+Calling `client.batch_insert_records(...)` in a loop forces the SDK to wait for
+each transaction to be **included in a block** (~1–2s per call) before
+broadcasting the next. For 1,000 records that's 25+ minutes; for the Truflation
+CPI ingestor's ~17,000-record runs, it's 4–5 hours.
+
+`BulkInserter` instead:
+
+- Caches the nonce locally (one initial fetch, then increments)
+- Broadcasts each chunk fire-and-forget (`wait=False` underneath) — admission
+  takes ~50ms versus inclusion's 1–2s
+- Drains inflight hashes in batches via `wait_for_tx`
+- Retries automatically on `invalid nonce` (resets the cache and refetches)
+  and `mempool full` (backs off, keeps the cache)
+
+Result: 1,000 records land in roughly one minute on a typical node, instead of
+half an hour.
+
+## Prerequisites
+
+A local node with migrations applied + the dev key whitelisted. From the
+`node` repo:
+
+```bash
+task single:start                                                  # spin up postgres + tn-db
+PATH="$(pwd)/.build:$PATH" task action:migrate:dev                 # apply migrations + grant network_writer
+```
+
+The dev key is `0000000000000000000000000000000000000000000000000000000000000001`,
+which derives to address `0x7E5F4552091A69125d5DfCb7b8C2659029395Bdf`. Both
+`task action:migrate:dev` and the `single:start` defaults wire this address as
+the DB owner and grant it `system:network_writer`.
+
+## Running
+
+From the `sdk-py` repo:
+
+```bash
+source .venv/bin/activate
+python examples/bulk_insert_example/main.py
+```
+
+## Expected output
+
+```text
+connected as 0x7e5f4552091a69125d5dfcb7b8c2659029395bdf
+stream id: stbulkinsertxxxxxxxxxxxxxxxxxxxxx
+   (no existing stream to drop: ...)
+stream deployed (tx 0x...)
+broadcasting 25 records via BulkInserter (batch_size=10)...
+done: 3 chunks broadcast + drained in 1.05s (350ms/chunk avg)
+
+First 3 records read back:
+  EventTime=1704067200 Value=1.000000000000000000
+  EventTime=1704153600 Value=2.000000000000000000
+  EventTime=1704240000 Value=3.000000000000000000
+...
+Total verified: 25 records
+   dropped existing stream (tx 0x...)
+```
+
+## Customizing
+
+- **Larger workloads**: change `NUM_RECORDS` at the top of `main.py`. Chunks =
+  `ceil(NUM_RECORDS / 10)`.
+- **Throughput knobs**: pass kwargs to `BulkInserter`:
+
+  ```python
+  inserter = BulkInserter(
+      client,
+      batch_size=10,      # records per insert_records tx; protocol cap is 10
+      max_inflight=500,   # broadcasts queued before forced drain
+      max_attempts=5,     # initial + retries on transient errors
+  )
+  ```
+- **Testnet/mainnet**: change `TEST_PROVIDER_URL` and the private key. Note
+  that the account must hold `system:network_writer` to deploy streams.
+
+## Related
+
+- Python wrapper source: [`src/trufnetwork_sdk_py/bulk_inserter.py`](../../src/trufnetwork_sdk_py/bulk_inserter.py)
+- Underlying Go implementation: [`sdk-go/core/contractsapi/bulk_inserter.go`](https://github.com/trufnetwork/sdk-go/blob/main/core/contractsapi/bulk_inserter.go)
+- Pattern reference: [`tn_attestation/extension.go`](https://github.com/trufnetwork/node/blob/main/extensions/tn_attestation/extension.go)
+  in the node repo (PR #1356) — same cached-nonce design that solved the
+  attestation cron's "invalid nonce" noise.