Autoresearch implementation for testing

This commit is contained in:
Andrej Mickov
2026-03-24 10:37:51 +01:00
parent 4a435bf13d
commit a0086da16b
12 changed files with 1874 additions and 330 deletions

5
.gitignore vendored
View File

@@ -1,5 +1,8 @@
.env
.venv/
__pycache__/
ships-aerial-images/
runs*/
*.pt
*.pt
results.tsv
run.log

View File

@@ -1,305 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7f263647",
"metadata": {},
"source": [
"# Laboratory Exercise 5"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "448199f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: ultralytics in ./.venv/lib/python3.12/site-packages (8.3.159)\n",
"Requirement already satisfied: numpy>=1.23.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.3.1)\n",
"Requirement already satisfied: matplotlib>=3.3.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (3.10.3)\n",
"Requirement already satisfied: opencv-python>=4.6.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (4.11.0.86)\n",
"Requirement already satisfied: pillow>=7.1.2 in ./.venv/lib/python3.12/site-packages (from ultralytics) (11.2.1)\n",
"Requirement already satisfied: pyyaml>=5.3.1 in ./.venv/lib/python3.12/site-packages (from ultralytics) (6.0.2)\n",
"Requirement already satisfied: requests>=2.23.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.32.4)\n",
"Requirement already satisfied: scipy>=1.4.1 in ./.venv/lib/python3.12/site-packages (from ultralytics) (1.16.0)\n",
"Requirement already satisfied: torch>=1.8.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.7.1)\n",
"Requirement already satisfied: torchvision>=0.9.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (0.22.1)\n",
"Requirement already satisfied: tqdm>=4.64.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (4.67.1)\n",
"Requirement already satisfied: psutil in ./.venv/lib/python3.12/site-packages (from ultralytics) (7.0.0)\n",
"Requirement already satisfied: py-cpuinfo in ./.venv/lib/python3.12/site-packages (from ultralytics) (9.0.0)\n",
"Requirement already satisfied: pandas>=1.1.4 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.3.0)\n",
"Requirement already satisfied: ultralytics-thop>=2.0.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.0.14)\n",
"Requirement already satisfied: contourpy>=1.0.1 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (1.3.2)\n",
"Requirement already satisfied: cycler>=0.10 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (4.58.4)\n",
"Requirement already satisfied: kiwisolver>=1.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (1.4.8)\n",
"Requirement already satisfied: packaging>=20.0 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (25.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (3.2.3)\n",
"Requirement already satisfied: python-dateutil>=2.7 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (2.9.0.post0)\n",
"Requirement already satisfied: pytz>=2020.1 in ./.venv/lib/python3.12/site-packages (from pandas>=1.1.4->ultralytics) (2025.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in ./.venv/lib/python3.12/site-packages (from pandas>=1.1.4->ultralytics) (2025.2)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests>=2.23.0->ultralytics) (3.4.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in ./.venv/lib/python3.12/site-packages (from requests>=2.23.0->ultralytics) (3.10)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.12/site-packages (from requests>=2.23.0->ultralytics) (2.5.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in ./.venv/lib/python3.12/site-packages (from requests>=2.23.0->ultralytics) (2025.6.15)\n",
"Requirement already satisfied: filelock in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (3.18.0)\n",
"Requirement already satisfied: typing-extensions>=4.10.0 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (4.14.0)\n",
"Requirement already satisfied: setuptools in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (80.9.0)\n",
"Requirement already satisfied: sympy>=1.13.3 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (1.14.0)\n",
"Requirement already satisfied: networkx in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (3.5)\n",
"Requirement already satisfied: jinja2 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (3.1.6)\n",
"Requirement already satisfied: fsspec in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (2025.5.1)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.77)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.77)\n",
"Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.80)\n",
"Requirement already satisfied: nvidia-cudnn-cu12==9.5.1.17 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (9.5.1.17)\n",
"Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.4.1)\n",
"Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (11.3.0.4)\n",
"Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (10.3.7.77)\n",
"Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (11.7.1.2)\n",
"Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.5.4.2)\n",
"Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (0.6.3)\n",
"Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (2.26.2)\n",
"Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.77)\n",
"Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.85)\n",
"Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (1.11.1.6)\n",
"Requirement already satisfied: triton==3.3.1 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (3.3.1)\n",
"Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.12/site-packages (from python-dateutil>=2.7->matplotlib>=3.3.0->ultralytics) (1.17.0)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in ./.venv/lib/python3.12/site-packages (from sympy>=1.13.3->torch>=1.8.0->ultralytics) (1.3.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in ./.venv/lib/python3.12/site-packages (from jinja2->torch>=1.8.0->ultralytics) (3.0.2)\n"
]
}
],
"source": [
"!pip install ultralytics"
]
},
{
"cell_type": "markdown",
"id": "41b20cd9",
"metadata": {},
"source": [
"### Dataset\n",
"- **Source** : Kaggle\n",
"- **Format** : Images + annotations\n",
"- **Classes**: 1 (`ship`)\n",
"- **Resolution per image**: Typically 640x640\n",
"- **Dataset size**: 26900 pictures\n"
]
},
{
"cell_type": "markdown",
"id": "3f72c4c8",
"metadata": {},
"source": [
"#### Importing dataset and pre-trained model"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "dab37f87",
"metadata": {},
"outputs": [],
"source": [
"from ultralytics import YOLO\n",
"\n",
"model = YOLO(\"yolo11l.pt\")\n",
"\n",
"data_path = 'ships-aerial-images/data.yaml'"
]
},
{
"cell_type": "markdown",
"id": "9a15a1f5",
"metadata": {},
"source": [
"### Final training parameterse after couple iterations "
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "33e8f858",
"metadata": {},
"outputs": [],
"source": [
"train_params = {\n",
" 'epochs': 40,\n",
" 'batch': 32,\n",
" 'imgsz': 640,\n",
" 'lr0': 5e-4,\n",
" 'lrf': 0.1,\n",
" 'warmup_epochs': 5,\n",
" 'warmup_bias_lr': 1e-6,\n",
" 'momentum': 0.937,\n",
" 'weight_decay': 0.0001,\n",
" 'optimizer': 'AdamW',\n",
" 'device': '0,1',\n",
" 'project': 'runs/train',\n",
" 'name': 'vessel_deteciton_v11l',\n",
" 'exist_ok': True,\n",
" 'save_period': 2,\n",
" 'workers': 8,\n",
" 'patience': 20, \n",
" 'cos_lr': True, \n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "940aca02",
"metadata": {},
"outputs": [],
"source": [
"model.train(data=data_path, **train_params)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "55e7e6a4",
"metadata": {},
"outputs": [],
"source": [
"model = YOLO(\"runs/train/vessel_deteciton_v11l/weights/best.pt\")"
]
},
{
"cell_type": "markdown",
"id": "05a6fd1f",
"metadata": {},
"source": [
"### Validation"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "cf1f9cdb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Ultralytics 8.3.159 🚀 Python-3.12.3 torch-2.7.1+cu126 CUDA:0 (NVIDIA GeForce RTX 3090, 24135MiB)\n",
" CUDA:1 (NVIDIA GeForce RTX 3090, 24135MiB)\n",
"\u001b[34m\u001b[1mval: \u001b[0mFast image access ✅ (ping: 0.0±0.0 ms, read: 106.4±87.7 MB/s, size: 11.8 KB)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mval: \u001b[0mScanning /home/mlmonster/Projects/ferdzo/vesselDetection/ships-aerial-images/valid/labels.cache... 2165 images, 68 backgrounds, 0 corrupt: 100%|██████████| 2165/2165 [00:00<?, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = 172, len(boxes) = 3720. To resolve this only boxes will be used and all segments will be removed. To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 136/136 [00:15<00:00, 8.74it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" all 2165 3720 0.603 0.543 0.551 0.341\n",
"Speed: 0.2ms preprocess, 6.2ms inference, 0.0ms loss, 0.2ms postprocess per image\n",
"Results saved to \u001b[1m/home/mlmonster/Projects/ferdzo/vesselDetection/runs/detect/val4\u001b[0m\n"
]
}
],
"source": [
"validation = model.val(conf=0.01,iou=0.7, max_det=300, imgsz=640, device='0,1')"
]
},
{
"cell_type": "markdown",
"id": "124cb886",
"metadata": {},
"source": [
"### Testing the model on custom images"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "43560cd1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"image 1/1 /home/mlmonster/Projects/ferdzo/vesselDetection/5af55.jpg: 640x640 2 ships, 20.7ms\n",
"Speed: 21.6ms preprocess, 20.7ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)\n",
"Predictions: tensor([[727.0521, 301.3238, 749.1639, 321.9431],\n",
" [631.3250, 203.8833, 668.7556, 220.2926]], device='cuda:0')\n",
"Confidence: tensor([0.7513, 0.3399], device='cuda:0')\n",
"Class IDs: tensor([0., 0.], device='cuda:0')\n",
"Number of detections: 2\n"
]
}
],
"source": [
"test_image = \"/home/mlmonster/Projects/ferdzo/vesselDetection/5af55.jpg\"\n",
"results = model(test_image)\n",
"\n",
"for result in results:\n",
" print(f\"Predictions: {result.boxes.xyxy}\")\n",
" print(f\"Confidence: {result.boxes.conf}\")\n",
" print(f\"Class IDs: {result.boxes.cls}\")\n",
" print(f\"Number of detections: {len(result.boxes)}\") \n",
" result.save()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c42e373",
"metadata": {},
"outputs": [],
"source": [
"model.export(format='onnx')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.13

View File

@@ -1,4 +1,42 @@
# vesselDetection
### Ship detection using Machine Learning methods for the course Digital Processing of Image(Дигитално Процесирање на Слика)
This is a simple ship detection model, made using pre-trained YOLO with COCO weights.
Ship detection using YOLO for the course Digital Processing of Image (Дигитално Процесирање на Слика).
This repo now includes a lightweight `autoresearch`-style workflow adapted from `karpathy/autoresearch`: the idea is to let an AI agent iterate on `train.py`, run short fixed-budget experiments, and keep only changes that improve validation quality.
## Files that matter
- `prepare.py` - fixed utilities for dataset checks, runtime overrides, and metric extraction
- `train.py` - the single training file the agent edits
- `program.md` - instructions for the research agent
## Metric
The primary objective is `metrics/mAP50-95(B)` from Ultralytics validation results. Higher is better.
## Setup
Install dependencies with `uv`, make sure the dataset YAML exists at `ships-aerial-images/data.yaml`, then run:
```bash
uv sync
```
## Training
Run the baseline or any experiment with:
```bash
uv run train.py
```
By default, the training script uses a fixed 5-minute budget through the Ultralytics `time` argument and prints a compact summary at the end so an agent can compare runs automatically.
## Autoresearch loop
1. Create a fresh branch such as `autoresearch/mar24`
2. Read `program.md`
3. Run a baseline with `uv run train.py > run.log 2>&1`
4. Iterate only on `train.py`
5. Log outcomes to `results.tsv`
6. Keep only commits that improve `metrics/mAP50-95(B)`

165
prepare.py Normal file
View File

@@ -0,0 +1,165 @@
from __future__ import annotations
import csv
import os
from pathlib import Path
DEFAULT_DATA_PATH = Path("ships-aerial-images/data.yaml")
DEFAULT_PROJECT_DIR = Path("runs/autoresearch")
DEFAULT_TIME_HOURS = 5 / 60
PRIMARY_METRIC_KEY = "metrics/mAP50-95(B)"
def ensure_dataset_exists(data_path: Path) -> None:
if not data_path.exists():
raise FileNotFoundError(
f"Dataset config not found at '{data_path}'. Set YOLO_DATA or add the dataset before training."
)
def env_bool(name: str, default: bool) -> bool:
value = os.getenv(name)
if value is None:
return default
return value.strip().lower() in {"1", "true", "yes", "on"}
def build_train_kwargs(defaults: dict[str, object]) -> dict[str, object]:
kwargs = dict(defaults)
kwargs["project"] = os.getenv("YOLO_PROJECT", str(kwargs["project"]))
kwargs["name"] = os.getenv("YOLO_RUN_NAME", str(kwargs["name"]))
kwargs["exist_ok"] = env_bool("YOLO_EXIST_OK", bool(kwargs.get("exist_ok", True)))
time_override = os.getenv("YOLO_TIME_HOURS")
if time_override:
kwargs["time"] = float(time_override)
device_override = os.getenv("YOLO_DEVICE")
if device_override:
kwargs["device"] = device_override
return kwargs
def resolve_save_dir(
project_dir: Path, run_name: str, expected_save_dir: Path | None = None
) -> Path:
candidates: list[Path] = []
if expected_save_dir is not None:
candidates.append(expected_save_dir)
candidates.append(project_dir / run_name)
for candidate in candidates:
if (candidate / "results.csv").exists():
return candidate
matches = sorted(
(path for path in project_dir.glob(f"{run_name}*") if path.is_dir()),
key=lambda path: path.stat().st_mtime,
reverse=True,
)
for match in matches:
if (match / "results.csv").exists():
return match
return expected_save_dir or (project_dir / run_name)
def _to_float(value: str | None) -> float | None:
if value in {None, "", "nan", "None"}:
return None
try:
return float(value)
except ValueError:
return None
def _first_float(
row: dict[str, str], keys: list[str]
) -> tuple[str | None, float | None]:
for key in keys:
if key in row:
value = _to_float(row.get(key))
if value is not None:
return key, value
return None, None
def extract_experiment_summary(
save_dir: Path,
elapsed_seconds: float,
peak_vram_mb: float,
data_path: Path,
model_name: str,
) -> dict[str, object]:
results_csv = save_dir / "results.csv"
if not results_csv.exists():
raise FileNotFoundError(f"Expected training metrics at '{results_csv}'.")
with results_csv.open("r", encoding="utf-8", newline="") as handle:
rows = list(csv.DictReader(handle))
if not rows:
raise RuntimeError(f"Training metrics file '{results_csv}' is empty.")
last_row = rows[-1]
fitness_key, fitness = _first_float(
last_row, [PRIMARY_METRIC_KEY, "metrics/mAP50(B)", "metrics/precision(B)"]
)
_, precision = _first_float(last_row, ["metrics/precision(B)"])
_, recall = _first_float(last_row, ["metrics/recall(B)"])
_, map50 = _first_float(last_row, ["metrics/mAP50(B)"])
_, map50_95 = _first_float(last_row, [PRIMARY_METRIC_KEY])
_, epoch = _first_float(last_row, ["epoch"])
best_weights = save_dir / "weights/best.pt"
last_weights = save_dir / "weights/last.pt"
return {
"fitness_key": fitness_key or PRIMARY_METRIC_KEY,
"fitness": fitness,
"precision": precision,
"recall": recall,
"map50": map50,
"map50_95": map50_95,
"epoch": epoch,
"training_seconds": elapsed_seconds,
"total_seconds": elapsed_seconds,
"peak_vram_mb": peak_vram_mb,
"data_path": str(data_path),
"model_name": model_name,
"save_dir": str(save_dir),
"results_csv": str(results_csv),
"best_weights": str(best_weights),
"best_weights_exists": best_weights.exists(),
"last_weights": str(last_weights),
"last_weights_exists": last_weights.exists(),
}
def _format_metric(value: float | None, digits: int = 6) -> str:
if value is None:
return "n/a"
return f"{value:.{digits}f}"
def print_experiment_summary(summary: dict[str, object]) -> None:
print("---")
print(f"fitness_key: {summary['fitness_key']}")
print(f"fitness: {_format_metric(summary['fitness'])}")
print(f"training_seconds: {_format_metric(summary['training_seconds'], digits=1)}")
print(f"total_seconds: {_format_metric(summary['total_seconds'], digits=1)}")
print(f"peak_vram_mb: {_format_metric(summary['peak_vram_mb'], digits=1)}")
print(f"precision: {_format_metric(summary['precision'])}")
print(f"recall: {_format_metric(summary['recall'])}")
print(f"map50: {_format_metric(summary['map50'])}")
print(f"map50_95: {_format_metric(summary['map50_95'])}")
print(f"epoch: {_format_metric(summary['epoch'], digits=0)}")
print(f"data_path: {summary['data_path']}")
print(f"model: {summary['model_name']}")
print(f"save_dir: {summary['save_dir']}")
print(f"results_csv: {summary['results_csv']}")
print(f"best_weights: {summary['best_weights']}")
print(f"best_weights_ok: {str(summary['best_weights_exists']).lower()}")
print(f"last_weights: {summary['last_weights']}")
print(f"last_weights_ok: {str(summary['last_weights_exists']).lower()}")

115
program.md Normal file
View File

@@ -0,0 +1,115 @@
# autoresearch
This is an experiment to have the LLM do its own research.
## Setup
To set up a new experiment, work with the user to:
1. **Agree on a run tag**: propose a tag based on today's date (e.g. `mar24`). The branch `autoresearch/<tag>` must not already exist — this is a fresh run.
2. **Create the branch**: `git checkout -b autoresearch/<tag>` from current master.
3. **Read the in-scope files**: The repo is small. Read these files for full context:
- `README.md` — repository context.
- `prepare.py` — fixed runtime utilities, summary extraction, and dataset checks. Do not modify.
- `train.py` — the file you modify. Model choice, optimizer, hyperparameters, image size, and training loop entrypoint all live here.
4. **Verify data exists**: Check that `ships-aerial-images/data.yaml` exists, or that `YOLO_DATA` points to a valid dataset YAML. If not, tell the human to add the dataset first.
5. **Initialize results.tsv**: Create `results.tsv` with just the header row. The baseline will be recorded after the first run.
6. **Confirm and go**: Confirm setup looks good.
Once you get confirmation, kick off the experimentation.
## Experimentation
Each experiment runs through `uv run train.py`.
The training script uses a **fixed 5-minute time budget** through Ultralytics' `time` argument, so experiments are approximately comparable and always short enough to iterate quickly.
**What you CAN do:**
- Modify `train.py` — this is the only file you edit. Everything there is fair game: model size, model weights, image size, batch size, optimizer, learning rate schedule, augmentation knobs, worker count, freeze settings, and similar training parameters.
**What you CANNOT do:**
- Modify `prepare.py`. It is read-only.
- Install new packages or add dependencies. You can only use what's already in `pyproject.toml`.
- Modify the evaluation harness outside the normal Ultralytics validation outputs produced by the training run.
**The goal is simple: get the highest `metrics/mAP50-95(B)`.** Higher is better. Since the time budget is fixed, the core job is to find the best-performing experiment under that fixed budget.
**VRAM** is a soft constraint. Some increase is acceptable for meaningful gains, but avoid ideas that blow up memory or make experiments fragile.
**Simplicity criterion**: All else being equal, simpler is better. A tiny gain that adds ugly complexity is usually not worth it. Removing complexity while keeping equal or better quality is a win.
**The first run**: Your very first run should always be the baseline, so run the training script as is before changing anything.
## Output format
Once the script finishes it prints a summary like this:
```
---
fitness_key: metrics/mAP50-95(B)
fitness: 0.612345
training_seconds: 300.1
total_seconds: 300.1
peak_vram_mb: 8240.5
precision: 0.801234
recall: 0.745678
map50: 0.822222
map50_95: 0.612345
epoch: 18
```
You can extract the key metric from the log file with:
```
grep "^fitness:\|^peak_vram_mb:" run.log
```
## Logging results
When an experiment is done, log it to `results.tsv` (tab-separated, NOT comma-separated — commas break descriptions).
The TSV has a header row and 5 columns:
```
commit metric memory_gb status description
```
1. git commit hash (short, 7 chars)
2. `metrics/mAP50-95(B)` achieved (e.g. 0.612345) — use `0.000000` for crashes
3. peak memory in GB, round to `.1f` (divide `peak_vram_mb` by 1024) — use `0.0` for crashes
4. status: `keep`, `discard`, or `crash`
5. short text description of what the experiment tried
Example:
```
commit metric memory_gb status description
a1b2c3d 0.612345 8.1 keep baseline yolo11l 640 adamw
b2c3d4e 0.618901 9.4 keep increase image size to 768
c3d4e5f 0.605100 7.9 discard reduce batch and switch optimizer
d4e5f6g 0.000000 0.0 crash batch too large caused OOM
```
## The experiment loop
The experiment runs on a dedicated branch (e.g. `autoresearch/mar24`).
LOOP FOREVER:
1. Look at the git state: the current branch and commit.
2. Tune `train.py` with one experimental idea.
3. git commit
4. Run the experiment: `uv run train.py > run.log 2>&1`
5. Read out the results: `grep "^fitness:\|^peak_vram_mb:" run.log`
6. If the grep output is empty, the run crashed. Read the traceback from `run.log`, attempt a fix if it is easy, otherwise mark it as a crash and move on.
7. Record the result in `results.tsv` (do not commit `results.tsv`; leave it untracked).
8. If the metric improved, keep the commit.
9. If the metric is equal or worse, reset back to where you started.
The idea is that you are a completely autonomous researcher trying things out. If they work, keep. If they don't, discard. Advance the branch only with improvements.
**Timeout**: Each experiment should take about 5 minutes total, plus a small amount of overhead. If a run exceeds 10 minutes, kill it and treat it as a failure.
**Crashes**: If a run crashes (OOM, bad hyperparameters, a typo, etc.), use judgment. If it is something dumb and easy to fix, fix it and re-run. If the idea is fundamentally broken, log it as `crash` and move on.
**NEVER STOP**: Once the experiment loop has begun, do not pause to ask whether you should continue. Keep going until the human interrupts you.

12
pyproject.toml Normal file
View File

@@ -0,0 +1,12 @@
[project]
name = "vesseldetection"
version = "0.1.0"
description = "Autoresearch-style YOLO vessel detection experiments"
readme = "README.md"
requires-python = ">=3.10,<3.14"
dependencies = [
"ultralytics>=8.3.0",
]
[tool.uv]
package = false

Binary file not shown.

Before

Width:  |  Height:  |  Size: 336 KiB

BIN
slika.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 292 KiB

BIN
test.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 254 KiB

105
train.py
View File

@@ -1,28 +1,89 @@
from __future__ import annotations
import os
import time
from pathlib import Path
import torch
from ultralytics import YOLO
model = YOLO("yolo11l.pt")
from prepare import (
DEFAULT_DATA_PATH,
DEFAULT_PROJECT_DIR,
DEFAULT_TIME_HOURS,
build_train_kwargs,
ensure_dataset_exists,
extract_experiment_summary,
print_experiment_summary,
resolve_save_dir,
)
data_path = 'ships-aerial-images/data.yaml'
# The agent is expected to iterate on this file only.
MODEL_WEIGHTS = os.getenv("YOLO_MODEL", "yolo11l.pt")
DATA_PATH = Path(os.getenv("YOLO_DATA", str(DEFAULT_DATA_PATH)))
RUN_NAME = "vessel_detection_yolo11l"
train_params = {
'epochs': 40,
'batch': 32,
'imgsz': 640,
'lr0': 5e-4,
'lrf': 0.1,
'warmup_epochs': 5,
'warmup_bias_lr': 1e-6,
'momentum': 0.937,
'weight_decay': 0.0001,
'optimizer': 'AdamW',
'device': '0,1',
'project': 'runs/train',
'name': 'vessel_deteciton_v11l',
'exist_ok': True,
'save_period': 2,
'workers': 8,
'patience': 20,
'cos_lr': True,
TRAIN_PARAMS = {
"epochs": 40,
"time": DEFAULT_TIME_HOURS,
"batch": 32,
"imgsz": 640,
"lr0": 5e-4,
"lrf": 0.1,
"warmup_epochs": 5,
"warmup_bias_lr": 1e-6,
"momentum": 0.937,
"weight_decay": 1e-4,
"optimizer": "AdamW",
"device": "0",
"project": str(DEFAULT_PROJECT_DIR),
"name": RUN_NAME,
"exist_ok": True,
"save_period": 2,
"workers": 8,
"patience": 20,
"cos_lr": True,
"seed": 42,
"deterministic": True,
"plots": False,
}
model.train(data=data_path, **train_params)
def main() -> None:
ensure_dataset_exists(DATA_PATH)
train_kwargs = build_train_kwargs(TRAIN_PARAMS)
save_dir = Path(str(train_kwargs["project"])) / str(train_kwargs["name"])
if torch.cuda.is_available():
torch.cuda.reset_peak_memory_stats()
model = YOLO(MODEL_WEIGHTS)
start_time = time.time()
train_result = model.train(data=str(DATA_PATH), **train_kwargs)
elapsed_seconds = time.time() - start_time
peak_vram_mb = (
torch.cuda.max_memory_allocated() / 1024 / 1024
if torch.cuda.is_available()
else 0.0
)
result_save_dir = getattr(train_result, "save_dir", None)
save_dir = resolve_save_dir(
project_dir=Path(str(train_kwargs["project"])),
run_name=str(train_kwargs["name"]),
expected_save_dir=Path(result_save_dir) if result_save_dir else save_dir,
)
summary = extract_experiment_summary(
save_dir=save_dir,
elapsed_seconds=elapsed_seconds,
peak_vram_mb=peak_vram_mb,
data_path=DATA_PATH,
model_name=MODEL_WEIGHTS,
)
print_experiment_summary(summary)
if __name__ == "__main__":
main()

1454
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff