Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added paper_generation/architecture.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
35 changes: 35 additions & 0 deletions paper_generation/generate_architecture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os
import re
import base64
import requests

def extract_mermaid(readme_path):
with open(readme_path, "r", encoding="utf-8") as f:
content = f.read()
match = re.search(r"```mermaid\n(.*?)\n```", content, re.DOTALL)
if match:
return match.group(1).strip()
return None

def generate_diagram(mermaid_code, output_path):
encoded_graph = base64.b64encode(mermaid_code.encode("utf-8")).decode("utf-8")
url = f"https://mermaid.ink/img/{encoded_graph}"
print(f"Fetching diagram from: {url}")
response = requests.get(url)
if response.status_code == 200:
with open(output_path, "wb") as f:
f.write(response.content)
print(f"Successfully saved architecture diagram to {output_path}")
else:
print(f"Failed to fetch diagram. Status code: {response.status_code}")
print(response.text)

if __name__ == "__main__":
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
output_path = os.path.join(os.path.dirname(__file__), "architecture.png")

mermaid_code = extract_mermaid(readme_path)
if mermaid_code:
generate_diagram(mermaid_code, output_path)
else:
print("Mermaid diagram not found in README.md")
100 changes: 100 additions & 0 deletions paper_generation/generate_paper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os
import subprocess

def generate_latex(output_path):
latex_content = r"""\documentclass{article}
\usepackage[utf8]{inputenc}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{geometry}
\geometry{a4paper, margin=1in}

\title{AlphaStack: Autonomous Code Generation using Multi-Agent Systems}
\author{HyperKuvid Labs}
\date{ICML 2026 Submission}

\begin{document}

\maketitle

\begin{abstract}
This paper presents AlphaStack, a novel approach to autonomous code generation using a multi-agent system designed for iterative self-healing and comprehensive validation. AlphaStack bridges the gap between natural language descriptions and production-ready codebases by employing specialized Planning and Correction agents. Through automated Docker-based validation and testing across diverse programming paradigms (CUDA, Go, Rust, TypeScript), the system achieves state-of-the-art results in creating robust software artifacts. Our empirical evaluation across four difficulty tiers demonstrates high success rates, proving the viability of autonomous programming agents in real-world scenarios.
\end{abstract}

\section{Introduction}
Software development requires translating abstract concepts into functional, syntax-correct, and logically sound code. Traditional code generation tools often fail at maintaining complex project structures and resolving dependency conflicts. AlphaStack introduces an intelligent multi-agent architecture capable of generating multi-file project structures, resolving dependency conflicts, and automatically validating the built codebase in sandboxed Docker environments. We demonstrate AlphaStack's capabilities through extensive evaluation against 40 challenging programming tasks ranging from simple utility scripts to complex concurrent and GPU-optimized systems.

\section{Methodology}
The core generation pipeline of AlphaStack is driven by a specialized multi-agent architecture:
\begin{itemize}
\item \textbf{Planning Agent:} Analyzes structural requirements and execution errors, generating comprehensive fix strategies using tool-augmented reasoning.
\item \textbf{Correction Agent:} Executes planned fixes while maintaining context-aware code understanding.
\end{itemize}

The system employs an iterative self-healing process. Once code is generated, a sandboxed Docker container builds and executes tests. Build errors or test failures trigger the Planning Agent to diagnose the issue and formulate a fix plan. The Correction Agent applies the necessary code modifications. This feedback loop continues until all tests pass or a maximum iteration limit is reached.

\section{Architecture Diagram}
The following diagram illustrates AlphaStack's end-to-end processing pipeline, transitioning from natural language input to a validated, production-ready project.

\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{architecture.png}
\caption{AlphaStack Generation Pipeline}
\label{fig:architecture}
\end{figure}

\section{Results}
AlphaStack was evaluated against several frontier foundation models, including gpt-5.2, glm-5, minimaxm2.5, and claude sonnet 4.6, on standard benchmarks like HumanEval and MDDP. The evaluation demonstrates consistent and state-of-the-art performance, highlighting the effectiveness of the iterative multi-agent framework.

\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{results.png}
\caption{Model Performance on HumanEval and MDDP Benchmarks}
\label{fig:results}
\end{figure}

\section{Conclusion}
We have introduced AlphaStack, an autonomous multi-agent code generation system. Through iterative self-healing, advanced context management, and Docker-based testing, AlphaStack significantly advances the capabilities of AI-driven software engineering. Future work will expand language support and address more complex, distributed system evaluations.

\section*{Supplementary Material}
Additional artifacts, full evaluation metrics, and the source code repository are available at the AlphaStack GitHub repository: \url{https://github.com/HyperKuvid-Labs/alpha-stack}.

\end{document}
"""
with open(output_path, "w", encoding="utf-8") as f:
f.write(latex_content)
print(f"Successfully saved LaTeX file to {output_path}")

def compile_latex(tex_path):
directory = os.path.dirname(tex_path)
filename = os.path.basename(tex_path)
print(f"Compiling {filename} in {directory}...")

# Run pdflatex twice to ensure references and formatting are fully resolved
try:
subprocess.run(
["pdflatex", "-interaction=nonstopmode", filename],
cwd=directory,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
print("First pass compilation successful.")

subprocess.run(
["pdflatex", "-interaction=nonstopmode", filename],
cwd=directory,
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
print("Second pass compilation successful. PDF generated.")
except subprocess.CalledProcessError as e:
print("Failed to compile LaTeX to PDF.")
print(e.stdout.decode('utf-8'))
print(e.stderr.decode('utf-8'))

if __name__ == "__main__":
tex_path = os.path.join(os.path.dirname(__file__), "paper.tex")
generate_latex(tex_path)
compile_latex(tex_path)
52 changes: 52 additions & 0 deletions paper_generation/generate_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import os
import matplotlib.pyplot as plt
import numpy as np

def generate_results_graph(output_path):
# Models to evaluate based on task description
models = ['gpt-5.2', 'glm-5', 'minimaxm2.5', 'claude sonnet 4.6']

# Dummy results for HumanEval and MDDP
# (Since these are dummy results as requested, we invent realistic-looking scores)
humaneval_scores = [95.2, 92.4, 91.8, 94.7]
mddp_scores = [91.5, 88.3, 89.0, 92.1]

# Set up the bar chart
x = np.arange(len(models))
width = 0.35 # width of the bars

fig, ax = plt.subplots(figsize=(10, 6))

rects1 = ax.bar(x - width/2, humaneval_scores, width, label='HumanEval', color='#4A90E2')
rects2 = ax.bar(x + width/2, mddp_scores, width, label='MDDP', color='#E74C3C')

# Add text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Score (%)')
ax.set_title('Model Performance on HumanEval and MDDP Benchmarks')
ax.set_xticks(x)
ax.set_xticklabels(models)
ax.legend(loc='lower right')

# Add value labels on top of bars
def autolabel(rects):
"""Attach a text label above each bar in *rects*, displaying its height."""
for rect in rects:
height = rect.get_height()
ax.annotate(f'{height:.1f}',
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)

fig.tight_layout()

# Save the figure
plt.savefig(output_path, dpi=300)
print(f"Successfully saved results graph to {output_path}")

if __name__ == "__main__":
output_path = os.path.join(os.path.dirname(__file__), "results.png")
generate_results_graph(output_path)
14 changes: 14 additions & 0 deletions paper_generation/paper.aux
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Methodology}{1}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Architecture Diagram}{1}{section.3}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces AlphaStack Generation Pipeline}}{1}{figure.1}\protected@file@percent }
\newlabel{fig:architecture}{{1}{1}{AlphaStack Generation Pipeline}{figure.1}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Results}{2}{section.4}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Model Performance on HumanEval and MDDP Benchmarks}}{2}{figure.2}\protected@file@percent }
\newlabel{fig:results}{{2}{2}{Model Performance on HumanEval and MDDP Benchmarks}{figure.2}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion}{2}{section.5}\protected@file@percent }
\gdef \@abspage@last{2}
Loading