Skip to content

Commit 0f15ee0

Browse files
committed
Python: Fix file_search citations breaking assistant history roundtrip
The Responses API rejects 'input_file' inside an assistant message, but the SDK was emitting it whenever an assistant Message contained a hosted_file content (which is what file_search citations become). Three coordinated fixes: 1. _prepare_content_for_openai now skips hosted_file for the assistant role instead of mapping to input_file (which the API rejects there). 2. The streaming response.output_text.annotation.added handler attaches file_citation, container_file_citation, and file_path as annotations on text content, matching the non-streaming path. Previously streaming produced standalone HostedFileContent items that always tripped (1). 3. output_text serialization preserves Annotation objects on roundtrip via a new _annotations_to_output_text helper instead of hardcoding 'annotations' to []. file_search citations now survive multi-agent forwarding. Closes #5556.
1 parent 866a325 commit 0f15ee0

2 files changed

Lines changed: 307 additions & 49 deletions

File tree

python/packages/openai/agent_framework_openai/_chat_client.py

Lines changed: 120 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,72 @@ class OpenAIChatOptions(ChatOptions[ResponseFormatT], Generic[ResponseFormatT],
241241
# endregion
242242

243243

244+
# region Helpers
245+
246+
247+
def _annotations_to_output_text(annotations: Sequence[Annotation] | None) -> list[dict[str, Any]]:
248+
"""Convert framework `Annotation` objects to Responses API `output_text` annotation dicts.
249+
250+
Citations from `file_search`, `code_interpreter` file paths, and url citations all collapse
251+
to `Annotation(type="citation", ...)` in the framework. The original API form is recovered
252+
here so assistant messages roundtrip cleanly through history forwarding.
253+
"""
254+
if not annotations:
255+
return []
256+
out: list[dict[str, Any]] = []
257+
for annotation in annotations:
258+
if annotation.get("type") != "citation":
259+
continue
260+
props = annotation.get("additional_properties") or {}
261+
regions = annotation.get("annotated_regions") or []
262+
first_region = regions[0] if regions else None
263+
file_id = annotation.get("file_id")
264+
url = annotation.get("url")
265+
title = annotation.get("title")
266+
container_id = props.get("container_id")
267+
268+
if container_id and file_id and first_region is not None:
269+
entry: dict[str, Any] = {
270+
"type": "container_file_citation",
271+
"container_id": container_id,
272+
"file_id": file_id,
273+
"start_index": first_region.get("start_index"),
274+
"end_index": first_region.get("end_index"),
275+
}
276+
if url:
277+
entry["filename"] = url
278+
out.append(entry)
279+
elif first_region is not None and url and not file_id:
280+
out.append({
281+
"type": "url_citation",
282+
"url": url,
283+
"title": title or "",
284+
"start_index": first_region.get("start_index"),
285+
"end_index": first_region.get("end_index"),
286+
})
287+
elif file_id and url:
288+
entry = {
289+
"type": "file_citation",
290+
"file_id": file_id,
291+
"filename": url,
292+
}
293+
if (idx := props.get("index")) is not None:
294+
entry["index"] = idx
295+
out.append(entry)
296+
elif file_id:
297+
entry = {
298+
"type": "file_path",
299+
"file_id": file_id,
300+
}
301+
if (idx := props.get("index")) is not None:
302+
entry["index"] = idx
303+
out.append(entry)
304+
return out
305+
306+
307+
# endregion
308+
309+
244310
# region ResponsesClient
245311

246312

@@ -1374,7 +1440,7 @@ def _prepare_content_for_openai(
13741440
return {
13751441
"type": "output_text",
13761442
"text": content.text,
1377-
"annotations": [],
1443+
"annotations": _annotations_to_output_text(getattr(content, "annotations", None)),
13781444
}
13791445
return {
13801446
"type": "input_text",
@@ -1522,6 +1588,13 @@ def _prepare_content_for_openai(
15221588
"approve": content.approved,
15231589
}
15241590
case "hosted_file":
1591+
# `input_file` is an input-only content type in the Responses API and is rejected
1592+
# inside an assistant message. Hosted-file content on an assistant message
1593+
# represents a citation produced by a hosted tool (e.g., file_search) and cannot be
1594+
# meaningfully replayed as input — drop it. The accompanying text annotations carry
1595+
# the citation context for round-tripping.
1596+
if role == "assistant":
1597+
return {}
15251598
return {
15261599
"type": "input_file",
15271600
"file_id": content.file_id,
@@ -2502,45 +2575,63 @@ def _get_ann_value(key: str) -> Any:
25022575

25032576
ann_type = _get_ann_value("type")
25042577
ann_file_id = _get_ann_value("file_id")
2578+
# Hosted-file citations attach as text annotations (matching the non-streaming path)
2579+
# so they don't roundtrip as standalone `input_file` items in assistant history.
25052580
if ann_type == "file_path":
25062581
if ann_file_id:
2582+
annotation_obj = Annotation(
2583+
type="citation",
2584+
file_id=str(ann_file_id),
2585+
additional_properties={
2586+
"annotation_index": event.annotation_index,
2587+
"index": _get_ann_value("index"),
2588+
},
2589+
raw_representation=annotation,
2590+
)
25072591
contents.append(
2508-
Content.from_hosted_file(
2509-
file_id=str(ann_file_id),
2510-
additional_properties={
2511-
"annotation_index": event.annotation_index,
2512-
"index": _get_ann_value("index"),
2513-
},
2514-
raw_representation=event,
2515-
)
2592+
Content.from_text(text="", annotations=[annotation_obj], raw_representation=event)
25162593
)
25172594
elif ann_type == "file_citation":
25182595
if ann_file_id:
2596+
ann_filename = _get_ann_value("filename")
2597+
annotation_obj = Annotation(
2598+
type="citation",
2599+
file_id=str(ann_file_id),
2600+
url=ann_filename,
2601+
additional_properties={
2602+
"annotation_index": event.annotation_index,
2603+
"index": _get_ann_value("index"),
2604+
},
2605+
raw_representation=annotation,
2606+
)
25192607
contents.append(
2520-
Content.from_hosted_file(
2521-
file_id=str(ann_file_id),
2522-
additional_properties={
2523-
"annotation_index": event.annotation_index,
2524-
"filename": _get_ann_value("filename"),
2525-
"index": _get_ann_value("index"),
2526-
},
2527-
raw_representation=event,
2528-
)
2608+
Content.from_text(text="", annotations=[annotation_obj], raw_representation=event)
25292609
)
25302610
elif ann_type == "container_file_citation":
25312611
if ann_file_id:
2612+
ann_filename = _get_ann_value("filename")
2613+
ann_start = _get_ann_value("start_index")
2614+
ann_end = _get_ann_value("end_index")
2615+
annotation_obj = Annotation(
2616+
type="citation",
2617+
file_id=str(ann_file_id),
2618+
url=ann_filename,
2619+
additional_properties={
2620+
"annotation_index": event.annotation_index,
2621+
"container_id": _get_ann_value("container_id"),
2622+
},
2623+
raw_representation=annotation,
2624+
)
2625+
if ann_start is not None and ann_end is not None:
2626+
annotation_obj["annotated_regions"] = [
2627+
TextSpanRegion(
2628+
type="text_span",
2629+
start_index=ann_start,
2630+
end_index=ann_end,
2631+
)
2632+
]
25322633
contents.append(
2533-
Content.from_hosted_file(
2534-
file_id=str(ann_file_id),
2535-
additional_properties={
2536-
"annotation_index": event.annotation_index,
2537-
"container_id": _get_ann_value("container_id"),
2538-
"filename": _get_ann_value("filename"),
2539-
"start_index": _get_ann_value("start_index"),
2540-
"end_index": _get_ann_value("end_index"),
2541-
},
2542-
raw_representation=event,
2543-
)
2634+
Content.from_text(text="", annotations=[annotation_obj], raw_representation=event)
25442635
)
25452636
elif ann_type == "url_citation":
25462637
ann_url = _get_ann_value("url")

0 commit comments

Comments
 (0)