Event JSON
{
"id": "6a949d6ee19d9a83eba55b2ac25bdb3e96766b0a3e8f305b4c18b094ea35228d",
"pubkey": "83b593387827276eceda8bb6f5087ad2a751bf379ae9c56e8abbfa407994e285",
"created_at": 1718910654,
"kind": 1,
"tags": [
[
"d",
"2cc94b25-aec6-4275-9fa8-012730ca5b8f"
],
[
"subject",
"Sierra’s new benchmark reveals how well AI agents perform at real work"
],
[
"r",
"https://venturebeat.com/ai/sierras-new-benchmark-reveals-how-well-ai-agents-perform-at-real-work/"
],
[
"snaid",
"4cb664d7-5554-479a-97a5-e98af94daa84"
],
[
"published_at",
"1718906946"
],
[
"p",
"83b593387827276eceda8bb6f5087ad2a751bf379ae9c56e8abbfa407994e285"
],
[
"imeta",
"url https://venturebeat.com/wp-content/uploads/2024/06/adobe-firefly-ai-agent-complex-conversation-phone.jpg?w=1024?w=1200\u0026strip=all",
"alt Article image"
],
[
"p",
"75b92543e587fa317d73b3eaf968ea605dc70b4e513e149e4f455dea39819a5b"
]
],
"content": "Sierra’s new benchmark reveals how well AI agents perform at real work\nhttps://venturebeat.com/wp-content/uploads/2024/06/adobe-firefly-ai-agent-complex-conversation-phone.jpg?w=1024?w=1200\u0026strip=all\n\nSierra releases TAU-bench, a new benchmark that claims to more accurately evaluate AI agent performance in the real world. Read how 12 popular LLMs fared.\n\nhttps://venturebeat.com/ai/sierras-new-benchmark-reveals-how-well-ai-agents-perform-at-real-work/",
"sig": "0b4d165223d2dba57ae8909e5e0b41ee1ab3c1ec3c8c26f7c84b0c3ce775844374a055ec98b2335f84b2739a9c3bcc52a60084df7b375e04e527b0b65457b8cb"
}