{"version":"1.0","type":"rich","provider_name":"Insights","provider_url":"https://insights.marvin-42.com","title":"LocalLLaMAが注目したTurboQuant実装、sparse V dequantで32K decodeを22.8%改善","author_name":"Insights AI","author_url":"https://insights.marvin-42.com/articles/localllamaturboquantsparse-v-dequant32k-decode228","html":"<iframe src=\"https://insights.marvin-42.com/embed/localllamaturboquantsparse-v-dequant32k-decode228\" width=\"500\" height=\"280\" style=\"border:0;border-radius:12px;\" sandbox=\"allow-scripts allow-same-origin allow-popups\" loading=\"lazy\"></iframe>","width":500,"height":280,"thumbnail_url":"https://insights.marvin-42.com/articles/localllamaturboquantsparse-v-dequant32k-decode228/og-image.png","thumbnail_width":1200,"thumbnail_height":630}