Event JSON
{
"id": "f49b2a8a3fa27499d479aa1bc0bf0ad7cb4986cf2e82bcfd07e632470ec101b6",
"pubkey": "83b593387827276eceda8bb6f5087ad2a751bf379ae9c56e8abbfa407994e285",
"created_at": 1718910693,
"kind": 1,
"tags": [
[
"d",
"a21fc1dd-c45b-4207-a3d7-f75c9a75c674"
],
[
"subject",
"Sierra’s new benchmark reveals how well AI agents perform at real work"
],
[
"r",
"https://venturebeat.com/ai/sierras-new-benchmark-reveals-how-well-ai-agents-perform-at-real-work/"
],
[
"snaid",
"4cb664d7-5554-479a-97a5-e98af94daa84"
],
[
"published_at",
"1718906946"
],
[
"p",
"83b593387827276eceda8bb6f5087ad2a751bf379ae9c56e8abbfa407994e285"
],
[
"imeta",
"url https://venturebeat.com/wp-content/uploads/2024/06/adobe-firefly-ai-agent-complex-conversation-phone.jpg?w=1024?w=1200\u0026strip=all",
"alt Article image"
],
[
"p",
"75b92543e587fa317d73b3eaf968ea605dc70b4e513e149e4f455dea39819a5b"
]
],
"content": "Sierra’s new benchmark reveals how well AI agents perform at real work\nhttps://venturebeat.com/wp-content/uploads/2024/06/adobe-firefly-ai-agent-complex-conversation-phone.jpg?w=1024?w=1200\u0026strip=all\n\nSierra releases TAU-bench, a new benchmark that claims to more accurately evaluate AI agent performance in the real world. Read how 12 popular LLMs fared.\n\nhttps://venturebeat.com/ai/sierras-new-benchmark-reveals-how-well-ai-agents-perform-at-real-work/",
"sig": "d181c7450915b78a2f53b5c9bddd658c6e7b6323a77f56d7c95b5e6cc87287f43815a641bc5d3256e312ce9a711c57728c62ee32789bfdbe6ec5385519a34651"
}