

<!DOCTYPE html>

<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta http-equiv="X-UA-Compatible" content="IE=Edge">

  <link rel="stylesheet" href="/sgemm-optimization/assets/css/just-the-docs-default.css">

  <link rel="stylesheet" href="/sgemm-optimization/assets/css/just-the-docs-head-nav.css" id="jtd-head-nav-stylesheet">

  <style id="jtd-nav-activation">
  
.site-nav ul li a {
  background-image: none;
}

  </style>

  

  
    <script src="/sgemm-optimization/assets/js/vendor/lunr.min.js"></script>
  

  <script src="/sgemm-optimization/assets/js/just-the-docs.js"></script>

  <meta name="viewport" content="width=device-width, initial-scale=1">

  



  <!-- Begin Jekyll SEO tag v2.8.0 -->
<title>SGEMM Optimization | Learn CUDA SGEMM optimization from a readable baseline to Tensor Core WMMA, with verification, benchmarks, and compact engineering guidance.</title>
<meta name="generator" content="Jekyll v3.10.0" />
<meta property="og:title" content="SGEMM Optimization" />
<meta name="author" content="LessUp" />
<meta property="og:locale" content="en" />
<meta name="description" content="Learn CUDA SGEMM optimization from a readable baseline to Tensor Core WMMA, with verification, benchmarks, and compact engineering guidance." />
<meta property="og:description" content="Learn CUDA SGEMM optimization from a readable baseline to Tensor Core WMMA, with verification, benchmarks, and compact engineering guidance." />
<link rel="canonical" href="https://lessup.github.io/sgemm-optimization/assets/css/just-the-docs-head-nav.css" />
<meta property="og:url" content="https://lessup.github.io/sgemm-optimization/assets/css/just-the-docs-head-nav.css" />
<meta property="og:site_name" content="SGEMM Optimization" />
<meta property="og:type" content="website" />
<meta name="twitter:card" content="summary" />
<meta property="twitter:title" content="SGEMM Optimization" />
<script type="application/ld+json">
{"@context":"https://schema.org","@type":"WebPage","author":{"@type":"Person","name":"LessUp"},"description":"Learn CUDA SGEMM optimization from a readable baseline to Tensor Core WMMA, with verification, benchmarks, and compact engineering guidance.","headline":"SGEMM Optimization","url":"https://lessup.github.io/sgemm-optimization/assets/css/just-the-docs-head-nav.css"}</script>
<!-- End Jekyll SEO tag -->


  

</head>

<body>
  <a class="skip-to-main" href="#main-content">Skip to main content</a>
  <svg xmlns="http://www.w3.org/2000/svg" class="d-none">
  <symbol id="svg-link" viewBox="0 0 24 24">
  <title>Link</title>
  <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-link">
    <path d="M10 13a5 5 0 0 0 7.54.54l3-3a5 5 0 0 0-7.07-7.07l-1.72 1.71"></path><path d="M14 11a5 5 0 0 0-7.54-.54l-3 3a5 5 0 0 0 7.07 7.07l1.71-1.71"></path>
  </svg>
</symbol>

  <symbol id="svg-menu" viewBox="0 0 24 24">
  <title>Menu</title>
  <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-menu">
    <line x1="3" y1="12" x2="21" y2="12"></line><line x1="3" y1="6" x2="21" y2="6"></line><line x1="3" y1="18" x2="21" y2="18"></line>
  </svg>
</symbol>

  <symbol id="svg-arrow-right" viewBox="0 0 24 24">
  <title>Expand</title>
  <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-chevron-right">
    <polyline points="9 18 15 12 9 6"></polyline>
  </svg>
</symbol>

  <!-- Feather. MIT License: https://github.com/feathericons/feather/blob/master/LICENSE -->
<symbol id="svg-external-link" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-external-link">
  <title id="svg-external-link-title">(external link)</title>
  <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"></path><polyline points="15 3 21 3 21 9"></polyline><line x1="10" y1="14" x2="21" y2="3"></line>
</symbol>

  
    <symbol id="svg-doc" viewBox="0 0 24 24">
  <title>Document</title>
  <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-file">
    <path d="M13 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V9z"></path><polyline points="13 2 13 9 20 9"></polyline>
  </svg>
</symbol>

    <symbol id="svg-search" viewBox="0 0 24 24">
  <title>Search</title>
  <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-search">
    <circle cx="11" cy="11" r="8"></circle><line x1="21" y1="21" x2="16.65" y2="16.65"></line>
  </svg>
</symbol>

  
  
    <!-- Bootstrap Icons. MIT License: https://github.com/twbs/icons/blob/main/LICENSE.md -->
<symbol id="svg-copy" viewBox="0 0 16 16">
  <title>Copy</title>
  <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-clipboard" viewBox="0 0 16 16">
    <path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1h1a1 1 0 0 1 1 1V14a1 1 0 0 1-1 1H3a1 1 0 0 1-1-1V3.5a1 1 0 0 1 1-1h1v-1z"/>
    <path d="M9.5 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3zm-3-1A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3z"/>
  </svg>
</symbol>
<symbol id="svg-copied" viewBox="0 0 16 16">
  <title>Copied</title>
  <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" fill="currentColor" class="bi bi-clipboard-check-fill" viewBox="0 0 16 16">
    <path d="M6.5 0A1.5 1.5 0 0 0 5 1.5v1A1.5 1.5 0 0 0 6.5 4h3A1.5 1.5 0 0 0 11 2.5v-1A1.5 1.5 0 0 0 9.5 0h-3Zm3 1a.5.5 0 0 1 .5.5v1a.5.5 0 0 1-.5.5h-3a.5.5 0 0 1-.5-.5v-1a.5.5 0 0 1 .5-.5h3Z"/>
    <path d="M4 1.5H3a2 2 0 0 0-2 2V14a2 2 0 0 0 2 2h10a2 2 0 0 0 2-2V3.5a2 2 0 0 0-2-2h-1v1A2.5 2.5 0 0 1 9.5 5h-3A2.5 2.5 0 0 1 4 2.5v-1Zm6.854 7.354-3 3a.5.5 0 0 1-.708 0l-1.5-1.5a.5.5 0 0 1 .708-.708L7.5 10.793l2.646-2.647a.5.5 0 0 1 .708.708Z"/>
  </svg>
</symbol>

  
</svg>

  <div class="side-bar">
  <div class="site-header" role="banner">
    <a href="/sgemm-optimization/" class="site-title lh-tight">
  SGEMM Optimization

</a>
    <button id="menu-button" class="site-button btn-reset" aria-label="Toggle menu" aria-pressed="false">
      <svg viewBox="0 0 24 24" class="icon" aria-hidden="true"><use xlink:href="#svg-menu"></use></svg>
    </button>
  </div>

  <nav aria-label="Main" id="site-nav" class="site-nav">
  
  
    <ul class="nav-list"><li class="nav-list-item"><button class="nav-list-expander btn-reset" aria-label="toggle items in Home category" aria-pressed="false">
      <svg viewBox="0 0 24 24" aria-hidden="true"><use xlink:href="#svg-arrow-right"></use></svg>
    </button><a href="/sgemm-optimization/" class="nav-list-link">Home</a><ul class="nav-list"><li class="nav-list-item"><a href="/sgemm-optimization/docs/getting-started" class="nav-list-link">Getting Started</a></li><li class="nav-list-item"><a href="/sgemm-optimization/docs/kernel-naive" class="nav-list-link">1. Naïve Kernel</a></li><li class="nav-list-item"><a href="/sgemm-optimization/docs/kernel-tiled" class="nav-list-link">2. Tiled Kernel</a></li><li class="nav-list-item"><a href="/sgemm-optimization/docs/kernel-bank-free" class="nav-list-link">3. Bank Conflict Free</a></li><li class="nav-list-item"><a href="/sgemm-optimization/docs/kernel-double-buffer" class="nav-list-link">4. Double Buffer</a></li><li class="nav-list-item"><a href="/sgemm-optimization/docs/kernel-tensor-core" class="nav-list-link">5. Tensor Core</a></li></ul></li><li class="nav-list-item"><a href="/sgemm-optimization/docs/learning-path" class="nav-list-link">Learning Path</a></li><li class="nav-list-item"><a href="/sgemm-optimization/docs/architecture" class="nav-list-link">Architecture</a></li><li class="nav-list-item"><a href="/sgemm-optimization/docs/benchmark-results" class="nav-list-link">Benchmark Results</a></li><li class="nav-list-item"><button class="nav-list-expander btn-reset" aria-label="toggle items in Specifications category" aria-pressed="false">
      <svg viewBox="0 0 24 24" aria-hidden="true"><use xlink:href="#svg-arrow-right"></use></svg>
    </button><a href="/sgemm-optimization/specs/" class="nav-list-link">Specifications</a><ul class="nav-list"></ul></li><li class="nav-list-item"><a href="/sgemm-optimization/CHANGELOG" class="nav-list-link">Changelog</a></li><li class="nav-list-item"><a href="/sgemm-optimization/CONTRIBUTING" class="nav-list-link">Contributing</a></li><li class="nav-list-item"><a href="/sgemm-optimization/LICENSE" class="nav-list-link">License</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-archive-ready-governance-cleanup/specs/architecture/spec.html" class="nav-list-link">ADDED Requirements</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-archive-ready-governance-cleanup/specs/project-presentation/spec.html" class="nav-list-link">ADDED Requirements</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-archive-ready-governance-cleanup/specs/repository-governance/spec.html" class="nav-list-link">ADDED Requirements</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-archive-ready-governance-cleanup/specs/testing/spec.html" class="nav-list-link">ADDED Requirements</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/specs/architecture/spec.html" class="nav-list-link">Architecture Specification</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/0001-core-architecture/design.html" class="nav-list-link">Context</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/0002-implementation-roadmap/design.html" class="nav-list-link">Context</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-archive-ready-governance-cleanup/design.html" class="nav-list-link">Context</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/specs/kernel/spec.html" class="nav-list-link">Kernel Specification</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-initial-migration/tasks.html" class="nav-list-link">Migration Tasks</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/0001-core-architecture/proposal.html" class="nav-list-link">Motivation</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/0002-implementation-roadmap/proposal.html" class="nav-list-link">Motivation</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/AGENTS.html" class="nav-list-link">OpenSpec Agent Guide</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-archive-ready-governance-cleanup/tasks.html" class="nav-list-link">OpenSpec foundation</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/" class="nav-list-link">OpenSpec Workflow for This Repository</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/specs/project-presentation/spec.html" class="nav-list-link">Project Presentation Specification</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-initial-migration/proposal.html" class="nav-list-link">Proposal: Initial Migration to OpenSpec</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/specs/repository-governance/spec.html" class="nav-list-link">Repository Governance Specification</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/specs/testing/spec.html" class="nav-list-link">Testing Specification</a></li><li class="nav-list-item"><a href="/sgemm-optimization/openspec/changes/archive/2026-04-23-archive-ready-governance-cleanup/proposal.html" class="nav-list-link">Why</a></li></ul>

  <ul class="nav-list"><li class="nav-list-item external">
          <a href="https://github.com/LessUp/sgemm-optimization/tree/master/openspec" class="nav-list-link external"
            
          >
            OpenSpec in repo
            <svg viewBox="0 0 24 24" aria-labelledby="svg-external-link-title"><use xlink:href="#svg-external-link"></use></svg>
          </a>
        </li></ul>
</nav>


  
  
    <footer class="site-footer">
      This site uses <a href="https://github.com/just-the-docs/just-the-docs">Just the Docs</a>, a documentation theme for Jekyll.
    </footer>
  
</div>

  <div class="main" id="top">
    <div id="main-header" class="main-header">
  
    

<div class="search" role="search">
  <div class="search-input-wrap">
    <input type="text" id="search-input" class="search-input" tabindex="0" placeholder="Search SGEMM Optimization" aria-label="Search SGEMM Optimization" autocomplete="off">
    <label for="search-input" class="search-label"><svg viewBox="0 0 24 24" class="search-icon"><use xlink:href="#svg-search"></use></svg></label>
  </div>
  <div id="search-results" class="search-results"></div>
</div>

  
  
  
    <nav aria-label="Auxiliary" class="aux-nav">
  <ul class="aux-nav-list">
    
      <li class="aux-nav-list-item">
        <a href="//github.com/LessUp/sgemm-optimization" class="site-button"
          
          target="_blank" rel="noopener noreferrer"
          
        >
          GitHub
        </a>
      </li>
    
      <li class="aux-nav-list-item">
        <a href="//github.com/LessUp/sgemm-optimization/blob/master/README.zh-CN.md" class="site-button"
          
          target="_blank" rel="noopener noreferrer"
          
        >
          README (中文)
        </a>
      </li>
    
  </ul>
</nav>

  
</div>

    <div class="main-content-wrap">
      
      <div id="main-content" class="main-content">
        <main>
          
            

          

          
        </main>
        

  <hr>
  <footer>
    
      <p><a href="#top" id="back-to-top">Back to top</a></p>
    

    <p class="text-small text-grey-dk-100 mb-0">Copyright &copy; 2026 LessUp. Distributed under <a href="https://github.com/LessUp/sgemm-optimization/blob/master/LICENSE">MIT License</a>.</p>

    
  </footer>


      </div>
    </div>
    
      

<div class="search-overlay"></div>

    
  </div>

  
    





<script type="module">
  
  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@10.9.0/dist/mermaid.esm.min.mjs';
  

  var config = {}
;
  mermaid.initialize(config);
  mermaid.run({
    querySelector: '.language-mermaid',
  });
</script>



  
</body>
</html>

