{"version":"1.0","type":"rich","provider_name":"Insights","provider_url":"https://insights.marvin-42.com","title":"Cohere W4A8、vLLM Hopperでfirst-token latency 58%短縮を主張","author_name":"Insights AI","author_url":"https://insights.marvin-42.com/articles/cohere-w4a8vllm-hopperfirst-token-latency-58","html":"<iframe src=\"https://insights.marvin-42.com/embed/cohere-w4a8vllm-hopperfirst-token-latency-58\" width=\"500\" height=\"280\" style=\"border:0;border-radius:12px;\" sandbox=\"allow-scripts allow-same-origin allow-popups\" loading=\"lazy\"></iframe>","width":500,"height":280,"thumbnail_url":"https://insights.marvin-42.com/articles/cohere-w4a8vllm-hopperfirst-token-latency-58/og-image.png","thumbnail_width":1200,"thumbnail_height":630}