<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://blog.doubleword.ai</loc>
<lastmod>2026-03-26T15:26:39.684Z</lastmod>
<changefreq>weekly</changefreq>
<priority>1</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/price-reduction-for-qwen3-235b-on-doubleword</loc>
<lastmod>2026-01-22T16:36:49.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-11-how-speculative-decoding-speeds-up-language-models</loc>
<lastmod>2026-01-16T09:14:57.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/chasing-cheap-tokens-2x-cheaper-tokens-than-h100s-with-consumer-cards</loc>
<lastmod>2026-01-16T12:53:12.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/bst-expensive-comparisons</loc>
<lastmod>2026-01-13T09:13:55.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-12-understanding-model-parallelism</loc>
<lastmod>2026-01-16T09:14:57.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/queue-speculation-drafting-while-you-wait</loc>
<lastmod>2026-02-02T15:37:25.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/1-for-a-year-of-research-digests</loc>
<lastmod>2026-01-16T12:47:48.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-13-faster-inference-speculative-decoding-for-batched-workloads</loc>
<lastmod>2026-01-16T09:14:57.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/should-gpus-make-free-trade-agreements</loc>
<lastmod>2026-01-16T09:14:57.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-10-batched-endpoints</loc>
<lastmod>2026-01-16T12:53:12.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-3-how-to-serve-100-models-on-a-single-gpu-with-no-cold-starts</loc>
<lastmod>2025-12-16T08:56:04.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/choosing-the-right-model-for-the-use-case</loc>
<lastmod>2025-12-16T08:58:45.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-5-making-rag-work-for-multimodal-documents</loc>
<lastmod>2025-12-16T08:58:45.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/scaling-curation-llm-comparisons</loc>
<lastmod>2026-01-16T17:01:48.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/parallel-primitives</loc>
<lastmod>2026-01-10T15:24:48.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/ocr-and-the-bitter-lesson</loc>
<lastmod>2026-03-23T12:22:32.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/why-batch-inference-matters</loc>
<lastmod>2025-12-17T12:09:37.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/arxiv-llm-search</loc>
<lastmod>2026-01-10T15:24:48.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/zerodp-jit-weight-offloading-nvlink</loc>
<lastmod>2026-01-13T15:55:02.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/benchmarking-doubleword-control-layer</loc>
<lastmod>2025-12-17T12:10:36.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-6-how-to-speed-up-the-inference-of-ai-agents</loc>
<lastmod>2025-12-17T12:11:14.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-7-choosing-the-right-quantization-for-self-hosted-llms</loc>
<lastmod>2025-12-17T12:11:07.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-8-choosing-the-right-inference-engine-for-your-llm-deployment</loc>
<lastmod>2025-12-17T12:11:01.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-1-what-should-i-be-observing-in-my-llm-stack</loc>
<lastmod>2025-12-16T08:58:50.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-9-how-to-evaluate-open-source-llms</loc>
<lastmod>2025-12-16T08:58:50.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/understanding-chargeback-in-the-context-of-self-hosted-systems</loc>
<lastmod>2025-12-17T10:21:29.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-2-how-many-users-can-my-gpu-serve</loc>
<lastmod>2025-12-17T12:10:26.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://blog.doubleword.ai/behind-the-stack-ep-4-making-your-load-balancer-llm-aware</loc>
<lastmod>2025-12-17T12:10:28.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.8</priority>
</url>
</urlset>
