bluebull
Team consisting of a Ublo software engineer and 42 Paris alumna specializing in React, TypeScript, and Python-based AI agentic workflows.
Project Description
Bluebull is a text-to-Polars code generation system built on Qwen2.5-Coder-7B-Instruct, a code-specialized 7B model quantized to 4-bit (MLX on Apple Silicon, bitsandbytes NF4 on CUDA) to minimize VRAM usage to ~4GB.
Correctness (N):
A structured system prompt encodes 8 Polars-specific rules (e.g., .height not .count(), .dt.month() with parentheses, no ascending=True) alongside 5 hand-crafted few-shot examples targeting common LLM mistakes. A self-repair loop re-prompts the model with the error message on execution failure, automatically recovering many edge cases.
Latency (T):
Generation is capped at 200 tokens. The model loads in a background thread so the server responds immediately, and 4-bit quantization reduces both load time and memory bandwidth. The system scored 16/16 correct on the local eval set in ~39 seconds (N/T ≈ 0.41).
Stack:
FastAPI + Uvicorn, Polars 1.40, HuggingFace Transformers, MLX-LM, bitsandbytes, Python 3.12. Safe code execution with a 10-second timeout and restricted builtins.
from src.model import CodeGenerator
from src.evaluator import evaluate
if name == “main”:
gen = CodeGenerator()
evaluate(
eval_set_path=”data/eval_set.json”,
parquet_path=”data/sales.parquet”,
generator=gen,
max_retries=1,
)
import platform
import sys
def _is_apple_silicon() -> bool:
return sys.platform == “darwin” and platform.machine() == “arm64”
class CodeGenerator:
def init(self, model_name: str | None = None):
if _is_apple_silicon():
self._init_mlx(model_name or “mlx-community/Qwen2.5-Coder-7B-Instruct-4bit”)
else:
self._init_transformers(model_name or “Qwen/Qwen2.5-Coder-7B-Instruct”)
def _init_mlx(self, model_name: str) -> None:
from mlx_lm import load
self._backend = "mlx"
print(f"Loading {model_name} (MLX)...")
self.model, self.tokenizer = load(model_name)
print("Model loaded.")
def _init_transformers(self, model_name: str) -> None:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
self._backend = "transformers"
print(f"Loading {model_name} (transformers)...")
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
load_kwargs: dict = {"device_map": "auto"}
try:
from transformers import BitsAndBytesConfig
load_kwargs["quantization_config"] = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_quant_type="nf4",
)
print("Using 4-bit quantization (bitsandbytes).")
except (ImportError, Exception):
load_kwargs["torch_dtype"] = torch.float16
print("bitsandbytes unavailable, loading in float16.")
self.model = AutoModelForCausalLM.from_pretrained(model_name, **load_kwargs)
print("Model loaded.")
def generate(self, prompt: str, max_tokens: int = 200) -> str:
if self._backend == "mlx":
return self._generate_mlx(prompt, max_tokens)
return self._generate_transformers(prompt, max_tokens)
def _generate_mlx(self, prompt: str, max_tokens: int) -> str:
from mlx_lm import generate as mlx_generate
messages = [{"role": "user", "content": prompt}]
formatted = self.tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
output = mlx_generate(
self.model, self.tokenizer, prompt=formatted, max_tokens=max_tokens, verbose=False
)
return (
output.strip()
.replace("<|im_end|>", "")
.replace("<|endoftext|>", "")
.strip()
)
def _generate_transformers(self, prompt: str, max_tokens: int) -> str:
import torch
messages = [{"role": "user", "content": prompt}]
text = self.tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
with torch.no_grad():
out = self.model.generate(
**inputs,
max_new_tokens=max_tokens,
do_sample=False,
temperature=None,
top_p=None,
pad_token_id=self.tokenizer.eos_token_id,
)
new_tokens = out[0][inputs["input_ids"].shape[1]:]
return self.tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
from src.model import CodeGenerator
from src.evaluator import evaluate
if name == “main”:
gen = CodeGenerator()
evaluate(
eval_set_path=”data/eval_set.json”,
parquet_path=”data/sales.parquet”,
generator=gen,
max_retries=1,
)
SYSTEM_INSTRUCTION = “"”Generate ONLY valid Python Polars code.
df is loaded, pl is imported. No markdown, no explanation, no print.
Assign the final result to a variable named result.
Rules:
- Scalars: use .item() after .select() or .sort().head(1)[col].item()
- Row count: .height
- Date methods need parens: .dt.month(), .dt.year()
- Membership: .is_in([…]) not .isin()
- Sort DESCENDING (highest first): .sort(‘col’, descending=True)
- NEVER use ascending=True — that argument does not exist in Polars
- String equality in filter: pl.col(‘x’) == ‘value’
“””
FEW_SHOTS = [
{
“schema”: “df: product (str), revenue (f64), country (str)”,
“question”: “What is the total revenue in France?”,
“code”: “result = df.filter(pl.col(‘country’) == ‘France’).select(pl.col(‘revenue’).sum()).item()”,
},
{
“schema”: “df: product (str), revenue (f64)”,
“question”: “Which product has the highest total revenue?”,
“code”: “result = df.group_by(‘product’).agg(pl.col(‘revenue’).sum()).sort(‘revenue’, descending=True).head(1)[‘product’].item()”,
},
{
“schema”: “df: country (str), quantity (i64)”,
“question”: “Which country has the lowest average quantity?”,
“code”: “result = df.group_by(‘country’).agg(pl.col(‘quantity’).mean()).sort(‘quantity’).head(1)[‘country’].item()”,
},
{
“schema”: “df: date (date), revenue (f64)”,
“question”: “Which month number has the highest total revenue?”,
“code”: “result = df.group_by(pl.col(‘date’).dt.month().alias(‘month’)).agg(pl.col(‘revenue’).sum()).sort(‘revenue’, descending=True).head(1)[‘month’].item()”,
},
{
“schema”: “df: date (date), amount (f64)”,
“question”: “How many rows are in months 1 through 3?”,
“code”: “result = df.filter(pl.col(‘date’).dt.month().is_in([1, 2, 3])).height”,
},
]
def build_prompt(schema: str, question: str) -> str:
shots = “\n\n”.join(
f”Schema: {s[‘schema’]}\nQ: {s[‘question’]}\nCode: {s[‘code’]}”
for s in FEW_SHOTS
)
return (
f”{SYSTEM_INSTRUCTION}\n\n”
f”Examples:\n{shots}\n\n”
f”Schema: {schema}\nQ: {question}\nCode:”
)