Event JSON
{
"id": "3fc2a57354d8322b041d8fa652a10503b7dbbe8374a9be9bf4f5e59ac3824e83",
"pubkey": "83b593387827276eceda8bb6f5087ad2a751bf379ae9c56e8abbfa407994e285",
"created_at": 1718910682,
"kind": 1,
"tags": [
[
"d",
"bf96b6ba-1e90-459e-a29e-8847bcb4a9cd"
],
[
"subject",
"Sierra’s new benchmark reveals how well AI agents perform at real work"
],
[
"r",
"https://venturebeat.com/ai/sierras-new-benchmark-reveals-how-well-ai-agents-perform-at-real-work/"
],
[
"snaid",
"4cb664d7-5554-479a-97a5-e98af94daa84"
],
[
"published_at",
"1718906946"
],
[
"p",
"83b593387827276eceda8bb6f5087ad2a751bf379ae9c56e8abbfa407994e285"
],
[
"imeta",
"url https://venturebeat.com/wp-content/uploads/2024/06/adobe-firefly-ai-agent-complex-conversation-phone.jpg?w=1024?w=1200\u0026strip=all",
"alt Article image"
],
[
"p",
"75b92543e587fa317d73b3eaf968ea605dc70b4e513e149e4f455dea39819a5b"
]
],
"content": "Sierra’s new benchmark reveals how well AI agents perform at real work\nhttps://venturebeat.com/wp-content/uploads/2024/06/adobe-firefly-ai-agent-complex-conversation-phone.jpg?w=1024?w=1200\u0026strip=all\n\nSierra releases TAU-bench, a new benchmark that claims to more accurately evaluate AI agent performance in the real world. Read how 12 popular LLMs fared.\n\nhttps://venturebeat.com/ai/sierras-new-benchmark-reveals-how-well-ai-agents-perform-at-real-work/",
"sig": "d5ce97ae4b1e4afb52cfbc2e37c22f7cd29b7530548559f2ca076a56c5458b996fd9d9acbbe976317995f03b75ef7d0e214e2363bcc2582bea76d820fdb5cebf"
}