|
1334 | 1334 | "logo": [ |
1335 | 1335 | "https://joycode.s3.cn-north-1.jdcloud-oss.com/joycodelogo.png" |
1336 | 1336 | ], |
1337 | | - "site": "https://joycode.jd.com", |
| 1337 | + "site": "https://github.com/jd-opensource/joycode-agent", |
1338 | 1338 | "folder": "20250915_JoyCode", |
1339 | 1339 | "resolved": 74.6, |
1340 | 1340 | "date": "2025-09-15", |
|
1381 | 1381 | ], |
1382 | 1382 | "warning": null |
1383 | 1383 | }, |
| 1384 | + { |
| 1385 | + "name": "Prometheus-v1.2.1 + GPT-5", |
| 1386 | + "logo": [ |
| 1387 | + "https://raw.githubusercontent.com/EuniAI/Prometheus/main/docs/static/images/icon.jpg", |
| 1388 | + "https://raw.githubusercontent.com/EuniAI/Prometheus/main/docs/static/images/delysium.jpg" |
| 1389 | + ], |
| 1390 | + "site": "https://euni.ai/", |
| 1391 | + "folder": "20251015_Prometheus_v1.2.1_gpt5", |
| 1392 | + "resolved": 74.4, |
| 1393 | + "date": "2025-10-15", |
| 1394 | + "logs": "s3://swe-bench-experiments/verified/20251015_Prometheus_v1.2.1_gpt5/logs", |
| 1395 | + "trajs": "s3://swe-bench-experiments/verified/20251015_Prometheus_v1.2.1_gpt5/trajs", |
| 1396 | + "trajs_docent": false, |
| 1397 | + "cost": null, |
| 1398 | + "instance_cost": null, |
| 1399 | + "instance_calls": null, |
| 1400 | + "os_model": false, |
| 1401 | + "os_system": true, |
| 1402 | + "checked": false, |
| 1403 | + "tags": [ |
| 1404 | + "Model: gpt-5-2025-08-07", |
| 1405 | + "Org: EuniAI", |
| 1406 | + "Org: Delysium", |
| 1407 | + "System: Attempts - 2+" |
| 1408 | + ], |
| 1409 | + "warning": null |
| 1410 | + }, |
1384 | 1411 | { |
1385 | 1412 | "name": "Tools + Claude 4 Opus (2025-05-22)", |
1386 | 1413 | "logo": [ |
|
1405 | 1432 | ], |
1406 | 1433 | "warning": null |
1407 | 1434 | }, |
| 1435 | + { |
| 1436 | + "name": "Salesforce AI Research SAGE (bash-only)", |
| 1437 | + "logo": [ |
| 1438 | + "https://avatars.githubusercontent.com/u/137096229" |
| 1439 | + ], |
| 1440 | + "site": "https://www.salesforce.com/blog/sage-swe/", |
| 1441 | + "folder": "20251021_SalesforceAIResearch_SAGE_bash_only", |
| 1442 | + "resolved": 73.0, |
| 1443 | + "date": "2025-10-21", |
| 1444 | + "logs": "s3://swe-bench-experiments/verified/20251021_SalesforceAIResearch_SAGE_bash_only/logs", |
| 1445 | + "trajs": "s3://swe-bench-experiments/verified/20251021_SalesforceAIResearch_SAGE_bash_only/trajs", |
| 1446 | + "trajs_docent": false, |
| 1447 | + "cost": null, |
| 1448 | + "instance_cost": null, |
| 1449 | + "instance_calls": null, |
| 1450 | + "os_model": false, |
| 1451 | + "os_system": false, |
| 1452 | + "checked": false, |
| 1453 | + "tags": [ |
| 1454 | + "Model: claude-sonnet-4.5", |
| 1455 | + "Model: gpt-5", |
| 1456 | + "Org: Salesforce AI Research", |
| 1457 | + "System: Attempts - 2+" |
| 1458 | + ], |
| 1459 | + "warning": null |
| 1460 | + }, |
1408 | 1461 | { |
1409 | 1462 | "name": "Tools + Claude 4 Sonnet (2025-05-22)", |
1410 | 1463 | "logo": [ |
|
1471 | 1524 | "instance_cost": null, |
1472 | 1525 | "instance_calls": null, |
1473 | 1526 | "os_model": false, |
1474 | | - "os_system": false, |
| 1527 | + "os_system": true, |
1475 | 1528 | "checked": false, |
1476 | 1529 | "tags": [ |
1477 | 1530 | "Model: gpt-5-2025-08-07", |
|
1532 | 1585 | ], |
1533 | 1586 | "warning": null |
1534 | 1587 | }, |
| 1588 | + { |
| 1589 | + "name": "Lingxi v1.5 x Kimi K2", |
| 1590 | + "logo": [ |
| 1591 | + "https://upload.wikimedia.org/wikipedia/en/thumb/0/04/Huawei_Standard_logo.svg/1200px-Huawei_Standard_logo.svg.png" |
| 1592 | + ], |
| 1593 | + "site": "https://github.com/lingxi-agent/Lingxi/tree/master", |
| 1594 | + "folder": "20251014_Lingxi_kimi_k2", |
| 1595 | + "resolved": 71.2, |
| 1596 | + "date": "2025-10-14", |
| 1597 | + "logs": "s3://swe-bench-experiments/verified/20251014_Lingxi_kimi_k2/logs", |
| 1598 | + "trajs": "s3://swe-bench-experiments/verified/20251014_Lingxi_kimi_k2/trajs", |
| 1599 | + "trajs_docent": false, |
| 1600 | + "cost": null, |
| 1601 | + "instance_cost": null, |
| 1602 | + "instance_calls": null, |
| 1603 | + "os_model": true, |
| 1604 | + "os_system": true, |
| 1605 | + "checked": false, |
| 1606 | + "tags": [ |
| 1607 | + "Model: kimi-k2-0905-preview", |
| 1608 | + "Org: Huawei", |
| 1609 | + "System: Attempts - 1" |
| 1610 | + ], |
| 1611 | + "warning": null |
| 1612 | + }, |
1535 | 1613 | { |
1536 | 1614 | "name": "Warp", |
1537 | 1615 | "logo": [ |
|
2718 | 2796 | "warning": null, |
2719 | 2797 | "mini-swe-agent_version": "1.9.1" |
2720 | 2798 | }, |
| 2799 | + { |
| 2800 | + "name": "FrogBoss-32B-2510", |
| 2801 | + "logo": [ |
| 2802 | + "https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" |
| 2803 | + ], |
| 2804 | + "site": "https://microsoft.github.io/debug-gym/blog/2025/10/bug-pilot/", |
| 2805 | + "folder": "20251110_frogboss-32b", |
| 2806 | + "resolved": 53.6, |
| 2807 | + "date": "2025-11-10", |
| 2808 | + "logs": "s3://swe-bench-experiments/verified/20251110_frogboss-32b/logs", |
| 2809 | + "trajs": "s3://swe-bench-experiments/verified/20251110_frogboss-32b/trajs", |
| 2810 | + "trajs_docent": false, |
| 2811 | + "cost": null, |
| 2812 | + "instance_cost": null, |
| 2813 | + "instance_calls": null, |
| 2814 | + "os_model": false, |
| 2815 | + "os_system": true, |
| 2816 | + "checked": "false (See README.md for info on how to get your results verified)", |
| 2817 | + "tags": [ |
| 2818 | + "Model: FrogBoss-32B-2510", |
| 2819 | + "Org: Microsoft", |
| 2820 | + "System: Attempts - 1" |
| 2821 | + ], |
| 2822 | + "warning": null |
| 2823 | + }, |
2721 | 2824 | { |
2722 | 2825 | "name": "mini-SWE-agent + Gemini 2.5 Pro (2025-05-06)", |
2723 | 2826 | "logo": [ |
|
3312 | 3415 | ], |
3313 | 3416 | "warning": null |
3314 | 3417 | }, |
| 3418 | + { |
| 3419 | + "name": "FrogMini-14B-2510", |
| 3420 | + "logo": [ |
| 3421 | + "https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" |
| 3422 | + ], |
| 3423 | + "site": "https://microsoft.github.io/debug-gym/blog/2025/10/bug-pilot/", |
| 3424 | + "folder": "20251110_frogmini-14b", |
| 3425 | + "resolved": 45.0, |
| 3426 | + "date": "2025-11-10", |
| 3427 | + "logs": "s3://swe-bench-experiments/verified/20251110_frogmini-14b/logs", |
| 3428 | + "trajs": "s3://swe-bench-experiments/verified/20251110_frogmini-14b/trajs", |
| 3429 | + "trajs_docent": false, |
| 3430 | + "cost": null, |
| 3431 | + "instance_cost": null, |
| 3432 | + "instance_calls": null, |
| 3433 | + "os_model": false, |
| 3434 | + "os_system": true, |
| 3435 | + "checked": "false (See README.md for info on how to get your results verified)", |
| 3436 | + "tags": [ |
| 3437 | + "Model: FrogMini-14B-2510", |
| 3438 | + "Org: Microsoft", |
| 3439 | + "System: Attempts - 1" |
| 3440 | + ], |
| 3441 | + "warning": null |
| 3442 | + }, |
3315 | 3443 | { |
3316 | 3444 | "name": "mini-SWE-agent + o4-mini (2025-04-16)", |
3317 | 3445 | "logo": [ |
|
0 commit comments