Event JSON
{
"id": "97ae4bdf187f6b556ba3ba1b1323b984eb69c9cd21078cd8c04a1199793966e6",
"pubkey": "83b593387827276eceda8bb6f5087ad2a751bf379ae9c56e8abbfa407994e285",
"created_at": 1718910674,
"kind": 1,
"tags": [
[
"d",
"d05c6c84-87a3-46c5-b01f-cd69d6fa4524"
],
[
"subject",
"Sierra’s new benchmark reveals how well AI agents perform at real work"
],
[
"r",
"https://venturebeat.com/ai/sierras-new-benchmark-reveals-how-well-ai-agents-perform-at-real-work/"
],
[
"snaid",
"4cb664d7-5554-479a-97a5-e98af94daa84"
],
[
"published_at",
"1718906946"
],
[
"p",
"83b593387827276eceda8bb6f5087ad2a751bf379ae9c56e8abbfa407994e285"
],
[
"imeta",
"url https://venturebeat.com/wp-content/uploads/2024/06/adobe-firefly-ai-agent-complex-conversation-phone.jpg?w=1024?w=1200\u0026strip=all",
"alt Article image"
],
[
"p",
"75b92543e587fa317d73b3eaf968ea605dc70b4e513e149e4f455dea39819a5b"
]
],
"content": "Sierra’s new benchmark reveals how well AI agents perform at real work\nhttps://venturebeat.com/wp-content/uploads/2024/06/adobe-firefly-ai-agent-complex-conversation-phone.jpg?w=1024?w=1200\u0026strip=all\n\nSierra releases TAU-bench, a new benchmark that claims to more accurately evaluate AI agent performance in the real world. Read how 12 popular LLMs fared.\n\nhttps://venturebeat.com/ai/sierras-new-benchmark-reveals-how-well-ai-agents-perform-at-real-work/",
"sig": "72d9b9c3c2d1b43f5003d7fb74a2d801496a33d54e59209612dc18337a6880cb3b5a31aee35c1288b462ef810243c694b5699f50667ae1e3306078111075cb41"
}