Autoresearch implementation for testing

2026-06-04 01:16:46 +00:00 · 2026-03-24 10:37:51 +01:00
parent 4a435bf13d
commit a0086da16b
12 changed files with 1874 additions and 330 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,8 @@
 .env
 .venv/
 __pycache__/
 ships-aerial-images/
 runs*/
-*.pt
+*.pt
 results.tsv
 run.log
--- a/.ipynb_checkpoints/Lab5-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Lab5-checkpoint.ipynb
@@ -1,305 +0,0 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "7f263647",
   "metadata": {},
   "source": [
    "# Laboratory Exercise 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "448199f3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: ultralytics in ./.venv/lib/python3.12/site-packages (8.3.159)\n",
      "Requirement already satisfied: numpy>=1.23.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.3.1)\n",
      "Requirement already satisfied: matplotlib>=3.3.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (3.10.3)\n",
      "Requirement already satisfied: opencv-python>=4.6.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (4.11.0.86)\n",
      "Requirement already satisfied: pillow>=7.1.2 in ./.venv/lib/python3.12/site-packages (from ultralytics) (11.2.1)\n",
      "Requirement already satisfied: pyyaml>=5.3.1 in ./.venv/lib/python3.12/site-packages (from ultralytics) (6.0.2)\n",
      "Requirement already satisfied: requests>=2.23.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.32.4)\n",
      "Requirement already satisfied: scipy>=1.4.1 in ./.venv/lib/python3.12/site-packages (from ultralytics) (1.16.0)\n",
      "Requirement already satisfied: torch>=1.8.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.7.1)\n",
      "Requirement already satisfied: torchvision>=0.9.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (0.22.1)\n",
      "Requirement already satisfied: tqdm>=4.64.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (4.67.1)\n",
      "Requirement already satisfied: psutil in ./.venv/lib/python3.12/site-packages (from ultralytics) (7.0.0)\n",
      "Requirement already satisfied: py-cpuinfo in ./.venv/lib/python3.12/site-packages (from ultralytics) (9.0.0)\n",
      "Requirement already satisfied: pandas>=1.1.4 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.3.0)\n",
      "Requirement already satisfied: ultralytics-thop>=2.0.0 in ./.venv/lib/python3.12/site-packages (from ultralytics) (2.0.14)\n",
      "Requirement already satisfied: contourpy>=1.0.1 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (1.3.2)\n",
      "Requirement already satisfied: cycler>=0.10 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (0.12.1)\n",
      "Requirement already satisfied: fonttools>=4.22.0 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (4.58.4)\n",
      "Requirement already satisfied: kiwisolver>=1.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (1.4.8)\n",
      "Requirement already satisfied: packaging>=20.0 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (25.0)\n",
      "Requirement already satisfied: pyparsing>=2.3.1 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (3.2.3)\n",
      "Requirement already satisfied: python-dateutil>=2.7 in ./.venv/lib/python3.12/site-packages (from matplotlib>=3.3.0->ultralytics) (2.9.0.post0)\n",
      "Requirement already satisfied: pytz>=2020.1 in ./.venv/lib/python3.12/site-packages (from pandas>=1.1.4->ultralytics) (2025.2)\n",
      "Requirement already satisfied: tzdata>=2022.7 in ./.venv/lib/python3.12/site-packages (from pandas>=1.1.4->ultralytics) (2025.2)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in ./.venv/lib/python3.12/site-packages (from requests>=2.23.0->ultralytics) (3.4.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in ./.venv/lib/python3.12/site-packages (from requests>=2.23.0->ultralytics) (3.10)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.12/site-packages (from requests>=2.23.0->ultralytics) (2.5.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in ./.venv/lib/python3.12/site-packages (from requests>=2.23.0->ultralytics) (2025.6.15)\n",
      "Requirement already satisfied: filelock in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (3.18.0)\n",
      "Requirement already satisfied: typing-extensions>=4.10.0 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (4.14.0)\n",
      "Requirement already satisfied: setuptools in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (80.9.0)\n",
      "Requirement already satisfied: sympy>=1.13.3 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (1.14.0)\n",
      "Requirement already satisfied: networkx in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (3.5)\n",
      "Requirement already satisfied: jinja2 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (3.1.6)\n",
      "Requirement already satisfied: fsspec in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (2025.5.1)\n",
      "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.6.77 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.77)\n",
      "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.6.77 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.77)\n",
      "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.6.80 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.80)\n",
      "Requirement already satisfied: nvidia-cudnn-cu12==9.5.1.17 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (9.5.1.17)\n",
      "Requirement already satisfied: nvidia-cublas-cu12==12.6.4.1 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.4.1)\n",
      "Requirement already satisfied: nvidia-cufft-cu12==11.3.0.4 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (11.3.0.4)\n",
      "Requirement already satisfied: nvidia-curand-cu12==10.3.7.77 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (10.3.7.77)\n",
      "Requirement already satisfied: nvidia-cusolver-cu12==11.7.1.2 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (11.7.1.2)\n",
      "Requirement already satisfied: nvidia-cusparse-cu12==12.5.4.2 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.5.4.2)\n",
      "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.3 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (0.6.3)\n",
      "Requirement already satisfied: nvidia-nccl-cu12==2.26.2 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (2.26.2)\n",
      "Requirement already satisfied: nvidia-nvtx-cu12==12.6.77 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.77)\n",
      "Requirement already satisfied: nvidia-nvjitlink-cu12==12.6.85 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (12.6.85)\n",
      "Requirement already satisfied: nvidia-cufile-cu12==1.11.1.6 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (1.11.1.6)\n",
      "Requirement already satisfied: triton==3.3.1 in ./.venv/lib/python3.12/site-packages (from torch>=1.8.0->ultralytics) (3.3.1)\n",
      "Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.12/site-packages (from python-dateutil>=2.7->matplotlib>=3.3.0->ultralytics) (1.17.0)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in ./.venv/lib/python3.12/site-packages (from sympy>=1.13.3->torch>=1.8.0->ultralytics) (1.3.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in ./.venv/lib/python3.12/site-packages (from jinja2->torch>=1.8.0->ultralytics) (3.0.2)\n"
     ]
    }
   ],
   "source": [
    "!pip install ultralytics"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "41b20cd9",
   "metadata": {},
   "source": [
    "### Dataset\n",
    "- **Source** : Kaggle\n",
    "- **Format** : Images + annotations\n",
    "- **Classes**: 1 (`ship`)\n",
    "- **Resolution per image**: Typically 640x640\n",
    "- **Dataset size**: 26900 pictures\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3f72c4c8",
   "metadata": {},
   "source": [
    "#### Importing dataset and pre-trained model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "dab37f87",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ultralytics import YOLO\n",
    "\n",
    "model = YOLO(\"yolo11l.pt\")\n",
    "\n",
    "data_path = 'ships-aerial-images/data.yaml'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9a15a1f5",
   "metadata": {},
   "source": [
    "### Final training parameterse after couple iterations "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "33e8f858",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_params = {\n",
    "    'epochs': 40,\n",
    "    'batch': 32,\n",
    "    'imgsz': 640,\n",
    "    'lr0': 5e-4,\n",
    "    'lrf': 0.1,\n",
    "    'warmup_epochs': 5,\n",
    "    'warmup_bias_lr': 1e-6,\n",
    "    'momentum': 0.937,\n",
    "    'weight_decay': 0.0001,\n",
    "    'optimizer': 'AdamW',\n",
    "    'device': '0,1',\n",
    "    'project': 'runs/train',\n",
    "    'name': 'vessel_deteciton_v11l',\n",
    "    'exist_ok': True,\n",
    "    'save_period': 2,\n",
    "    'workers': 8,\n",
    "    'patience': 20,           \n",
    "    'cos_lr': True,            \n",
    "}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "940aca02",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.train(data=data_path, **train_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "55e7e6a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = YOLO(\"runs/train/vessel_deteciton_v11l/weights/best.pt\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "05a6fd1f",
   "metadata": {},
   "source": [
    "### Validation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "cf1f9cdb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ultralytics 8.3.159 🚀 Python-3.12.3 torch-2.7.1+cu126 CUDA:0 (NVIDIA GeForce RTX 3090, 24135MiB)\n",
      "                                                       CUDA:1 (NVIDIA GeForce RTX 3090, 24135MiB)\n",
      "\u001b[34m\u001b[1mval: \u001b[0mFast image access ✅ (ping: 0.0±0.0 ms, read: 106.4±87.7 MB/s, size: 11.8 KB)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[34m\u001b[1mval: \u001b[0mScanning /home/mlmonster/Projects/ferdzo/vesselDetection/ships-aerial-images/valid/labels.cache... 2165 images, 68 backgrounds, 0 corrupt: 100%|██████████| 2165/2165 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING ⚠️ Box and segment counts should be equal, but got len(segments) = 172, len(boxes) = 3720. To resolve this only boxes will be used and all segments will be removed. To avoid this please supply either a detect or segment dataset, not a detect-segment mixed dataset.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 136/136 [00:15<00:00,  8.74it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                   all       2165       3720      0.603      0.543      0.551      0.341\n",
      "Speed: 0.2ms preprocess, 6.2ms inference, 0.0ms loss, 0.2ms postprocess per image\n",
      "Results saved to \u001b[1m/home/mlmonster/Projects/ferdzo/vesselDetection/runs/detect/val4\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "validation = model.val(conf=0.01,iou=0.7, max_det=300, imgsz=640, device='0,1')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "124cb886",
   "metadata": {},
   "source": [
    "### Testing the model on custom images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "43560cd1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "image 1/1 /home/mlmonster/Projects/ferdzo/vesselDetection/5af55.jpg: 640x640 2 ships, 20.7ms\n",
      "Speed: 21.6ms preprocess, 20.7ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 640)\n",
      "Predictions: tensor([[727.0521, 301.3238, 749.1639, 321.9431],\n",
      "        [631.3250, 203.8833, 668.7556, 220.2926]], device='cuda:0')\n",
      "Confidence: tensor([0.7513, 0.3399], device='cuda:0')\n",
      "Class IDs: tensor([0., 0.], device='cuda:0')\n",
      "Number of detections: 2\n"
     ]
    }
   ],
   "source": [
    "test_image = \"/home/mlmonster/Projects/ferdzo/vesselDetection/5af55.jpg\"\n",
    "results = model(test_image)\n",
    "\n",
    "for result in results:\n",
    "    print(f\"Predictions: {result.boxes.xyxy}\")\n",
    "    print(f\"Confidence: {result.boxes.conf}\")\n",
    "    print(f\"Class IDs: {result.boxes.cls}\")\n",
    "    print(f\"Number of detections: {len(result.boxes)}\") \n",
    "    result.save()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6c42e373",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.export(format='onnx')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/.python-version
+++ b/.python-version
@@ -0,0 +1 @@
 3.13
--- a/README.md
+++ b/README.md
@@ -1,4 +1,42 @@
 # vesselDetection
 ### Ship detection using Machine Learning methods for the course Digital Processing of Image(Дигитално Процесирање на Слика)
-This is a simple ship detection model, made using pre-trained YOLO with COCO weights.
+Ship detection using YOLO for the course Digital Processing of Image (Дигитално Процесирање на Слика).
 This repo now includes a lightweight `autoresearch`-style workflow adapted from `karpathy/autoresearch`: the idea is to let an AI agent iterate on `train.py`, run short fixed-budget experiments, and keep only changes that improve validation quality.
 ## Files that matter
 - `prepare.py` - fixed utilities for dataset checks, runtime overrides, and metric extraction
 - `train.py` - the single training file the agent edits
 - `program.md` - instructions for the research agent
 ## Metric
 The primary objective is `metrics/mAP50-95(B)` from Ultralytics validation results. Higher is better.
 ## Setup
 Install dependencies with `uv`, make sure the dataset YAML exists at `ships-aerial-images/data.yaml`, then run:
 ```bash
 uv sync
 ```
 ## Training
 Run the baseline or any experiment with:
 ```bash
 uv run train.py
 ```
 By default, the training script uses a fixed 5-minute budget through the Ultralytics `time` argument and prints a compact summary at the end so an agent can compare runs automatically.
 ## Autoresearch loop
 1. Create a fresh branch such as `autoresearch/mar24`
 2. Read `program.md`
 3. Run a baseline with `uv run train.py > run.log 2>&1`
 4. Iterate only on `train.py`
 5. Log outcomes to `results.tsv`
 6. Keep only commits that improve `metrics/mAP50-95(B)`
--- a/prepare.py
+++ b/prepare.py
@@ -0,0 +1,165 @@
 from __future__ import annotations
 import csv
 import os
 from pathlib import Path
 DEFAULT_DATA_PATH = Path("ships-aerial-images/data.yaml")
 DEFAULT_PROJECT_DIR = Path("runs/autoresearch")
 DEFAULT_TIME_HOURS = 5 / 60
 PRIMARY_METRIC_KEY = "metrics/mAP50-95(B)"
 def ensure_dataset_exists(data_path: Path) -> None:
    if not data_path.exists():
        raise FileNotFoundError(
            f"Dataset config not found at '{data_path}'. Set YOLO_DATA or add the dataset before training."
        )
 def env_bool(name: str, default: bool) -> bool:
    value = os.getenv(name)
    if value is None:
        return default
    return value.strip().lower() in {"1", "true", "yes", "on"}
 def build_train_kwargs(defaults: dict[str, object]) -> dict[str, object]:
    kwargs = dict(defaults)
    kwargs["project"] = os.getenv("YOLO_PROJECT", str(kwargs["project"]))
    kwargs["name"] = os.getenv("YOLO_RUN_NAME", str(kwargs["name"]))
    kwargs["exist_ok"] = env_bool("YOLO_EXIST_OK", bool(kwargs.get("exist_ok", True)))
    time_override = os.getenv("YOLO_TIME_HOURS")
    if time_override:
        kwargs["time"] = float(time_override)
    device_override = os.getenv("YOLO_DEVICE")
    if device_override:
        kwargs["device"] = device_override
    return kwargs
 def resolve_save_dir(
    project_dir: Path, run_name: str, expected_save_dir: Path | None = None
 ) -> Path:
    candidates: list[Path] = []
    if expected_save_dir is not None:
        candidates.append(expected_save_dir)
    candidates.append(project_dir / run_name)
    for candidate in candidates:
        if (candidate / "results.csv").exists():
            return candidate
    matches = sorted(
        (path for path in project_dir.glob(f"{run_name}*") if path.is_dir()),
        key=lambda path: path.stat().st_mtime,
        reverse=True,
    )
    for match in matches:
        if (match / "results.csv").exists():
            return match
    return expected_save_dir or (project_dir / run_name)
 def _to_float(value: str | None) -> float | None:
    if value in {None, "", "nan", "None"}:
        return None
    try:
        return float(value)
    except ValueError:
        return None
 def _first_float(
    row: dict[str, str], keys: list[str]
 ) -> tuple[str | None, float | None]:
    for key in keys:
        if key in row:
            value = _to_float(row.get(key))
            if value is not None:
                return key, value
    return None, None
 def extract_experiment_summary(
    save_dir: Path,
    elapsed_seconds: float,
    peak_vram_mb: float,
    data_path: Path,
    model_name: str,
 ) -> dict[str, object]:
    results_csv = save_dir / "results.csv"
    if not results_csv.exists():
        raise FileNotFoundError(f"Expected training metrics at '{results_csv}'.")
    with results_csv.open("r", encoding="utf-8", newline="") as handle:
        rows = list(csv.DictReader(handle))
    if not rows:
        raise RuntimeError(f"Training metrics file '{results_csv}' is empty.")
    last_row = rows[-1]
    fitness_key, fitness = _first_float(
        last_row, [PRIMARY_METRIC_KEY, "metrics/mAP50(B)", "metrics/precision(B)"]
    )
    _, precision = _first_float(last_row, ["metrics/precision(B)"])
    _, recall = _first_float(last_row, ["metrics/recall(B)"])
    _, map50 = _first_float(last_row, ["metrics/mAP50(B)"])
    _, map50_95 = _first_float(last_row, [PRIMARY_METRIC_KEY])
    _, epoch = _first_float(last_row, ["epoch"])
    best_weights = save_dir / "weights/best.pt"
    last_weights = save_dir / "weights/last.pt"
    return {
        "fitness_key": fitness_key or PRIMARY_METRIC_KEY,
        "fitness": fitness,
        "precision": precision,
        "recall": recall,
        "map50": map50,
        "map50_95": map50_95,
        "epoch": epoch,
        "training_seconds": elapsed_seconds,
        "total_seconds": elapsed_seconds,
        "peak_vram_mb": peak_vram_mb,
        "data_path": str(data_path),
        "model_name": model_name,
        "save_dir": str(save_dir),
        "results_csv": str(results_csv),
        "best_weights": str(best_weights),
        "best_weights_exists": best_weights.exists(),
        "last_weights": str(last_weights),
        "last_weights_exists": last_weights.exists(),
    }
 def _format_metric(value: float | None, digits: int = 6) -> str:
    if value is None:
        return "n/a"
    return f"{value:.{digits}f}"
 def print_experiment_summary(summary: dict[str, object]) -> None:
    print("---")
    print(f"fitness_key:       {summary['fitness_key']}")
    print(f"fitness:           {_format_metric(summary['fitness'])}")
    print(f"training_seconds:  {_format_metric(summary['training_seconds'], digits=1)}")
    print(f"total_seconds:     {_format_metric(summary['total_seconds'], digits=1)}")
    print(f"peak_vram_mb:      {_format_metric(summary['peak_vram_mb'], digits=1)}")
    print(f"precision:         {_format_metric(summary['precision'])}")
    print(f"recall:            {_format_metric(summary['recall'])}")
    print(f"map50:             {_format_metric(summary['map50'])}")
    print(f"map50_95:          {_format_metric(summary['map50_95'])}")
    print(f"epoch:             {_format_metric(summary['epoch'], digits=0)}")
    print(f"data_path:         {summary['data_path']}")
    print(f"model:             {summary['model_name']}")
    print(f"save_dir:          {summary['save_dir']}")
    print(f"results_csv:       {summary['results_csv']}")
    print(f"best_weights:      {summary['best_weights']}")
    print(f"best_weights_ok:   {str(summary['best_weights_exists']).lower()}")
    print(f"last_weights:      {summary['last_weights']}")
    print(f"last_weights_ok:   {str(summary['last_weights_exists']).lower()}")
--- a/program.md
+++ b/program.md
@@ -0,0 +1,115 @@
 # autoresearch
 This is an experiment to have the LLM do its own research.
 ## Setup
 To set up a new experiment, work with the user to:
 1. **Agree on a run tag**: propose a tag based on today's date (e.g. `mar24`). The branch `autoresearch/<tag>` must not already exist — this is a fresh run.
 2. **Create the branch**: `git checkout -b autoresearch/<tag>` from current master.
 3. **Read the in-scope files**: The repo is small. Read these files for full context:
   - `README.md` — repository context.
   - `prepare.py` — fixed runtime utilities, summary extraction, and dataset checks. Do not modify.
   - `train.py` — the file you modify. Model choice, optimizer, hyperparameters, image size, and training loop entrypoint all live here.
 4. **Verify data exists**: Check that `ships-aerial-images/data.yaml` exists, or that `YOLO_DATA` points to a valid dataset YAML. If not, tell the human to add the dataset first.
 5. **Initialize results.tsv**: Create `results.tsv` with just the header row. The baseline will be recorded after the first run.
 6. **Confirm and go**: Confirm setup looks good.
 Once you get confirmation, kick off the experimentation.
 ## Experimentation
 Each experiment runs through `uv run train.py`.
 The training script uses a **fixed 5-minute time budget** through Ultralytics' `time` argument, so experiments are approximately comparable and always short enough to iterate quickly.
 **What you CAN do:**
 - Modify `train.py` — this is the only file you edit. Everything there is fair game: model size, model weights, image size, batch size, optimizer, learning rate schedule, augmentation knobs, worker count, freeze settings, and similar training parameters.
 **What you CANNOT do:**
 - Modify `prepare.py`. It is read-only.
 - Install new packages or add dependencies. You can only use what's already in `pyproject.toml`.
 - Modify the evaluation harness outside the normal Ultralytics validation outputs produced by the training run.
 **The goal is simple: get the highest `metrics/mAP50-95(B)`.** Higher is better. Since the time budget is fixed, the core job is to find the best-performing experiment under that fixed budget.
 **VRAM** is a soft constraint. Some increase is acceptable for meaningful gains, but avoid ideas that blow up memory or make experiments fragile.
 **Simplicity criterion**: All else being equal, simpler is better. A tiny gain that adds ugly complexity is usually not worth it. Removing complexity while keeping equal or better quality is a win.
 **The first run**: Your very first run should always be the baseline, so run the training script as is before changing anything.
 ## Output format
 Once the script finishes it prints a summary like this:
 ```
 ---
 fitness_key:       metrics/mAP50-95(B)
 fitness:           0.612345
 training_seconds:  300.1
 total_seconds:     300.1
 peak_vram_mb:      8240.5
 precision:         0.801234
 recall:            0.745678
 map50:             0.822222
 map50_95:          0.612345
 epoch:             18
 ```
 You can extract the key metric from the log file with:
 ```
 grep "^fitness:\|^peak_vram_mb:" run.log
 ```
 ## Logging results
 When an experiment is done, log it to `results.tsv` (tab-separated, NOT comma-separated — commas break descriptions).
 The TSV has a header row and 5 columns:
 ```
 commit	metric	memory_gb	status	description
 ```
 1. git commit hash (short, 7 chars)
 2. `metrics/mAP50-95(B)` achieved (e.g. 0.612345) — use `0.000000` for crashes
 3. peak memory in GB, round to `.1f` (divide `peak_vram_mb` by 1024) — use `0.0` for crashes
 4. status: `keep`, `discard`, or `crash`
 5. short text description of what the experiment tried
 Example:
 ```
 commit	metric	memory_gb	status	description
 a1b2c3d	0.612345	8.1	keep	baseline yolo11l 640 adamw
 b2c3d4e	0.618901	9.4	keep	increase image size to 768
 c3d4e5f	0.605100	7.9	discard	reduce batch and switch optimizer
 d4e5f6g	0.000000	0.0	crash	batch too large caused OOM
 ```
 ## The experiment loop
 The experiment runs on a dedicated branch (e.g. `autoresearch/mar24`).
 LOOP FOREVER:
 1. Look at the git state: the current branch and commit.
 2. Tune `train.py` with one experimental idea.
 3. git commit
 4. Run the experiment: `uv run train.py > run.log 2>&1`
 5. Read out the results: `grep "^fitness:\|^peak_vram_mb:" run.log`
 6. If the grep output is empty, the run crashed. Read the traceback from `run.log`, attempt a fix if it is easy, otherwise mark it as a crash and move on.
 7. Record the result in `results.tsv` (do not commit `results.tsv`; leave it untracked).
 8. If the metric improved, keep the commit.
 9. If the metric is equal or worse, reset back to where you started.
 The idea is that you are a completely autonomous researcher trying things out. If they work, keep. If they don't, discard. Advance the branch only with improvements.
 **Timeout**: Each experiment should take about 5 minutes total, plus a small amount of overhead. If a run exceeds 10 minutes, kill it and treat it as a failure.
 **Crashes**: If a run crashes (OOM, bad hyperparameters, a typo, etc.), use judgment. If it is something dumb and easy to fix, fix it and re-run. If the idea is fundamentally broken, log it as `crash` and move on.
 **NEVER STOP**: Once the experiment loop has begun, do not pause to ask whether you should continue. Keep going until the human interrupts you.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,12 @@
 [project]
 name = "vesseldetection"
 version = "0.1.0"
 description = "Autoresearch-style YOLO vessel detection experiments"
 readme = "README.md"
 requires-python = ">=3.10,<3.14"
 dependencies = [
    "ultralytics>=8.3.0",
 ]
 [tool.uv]
 package = false
--- a/results_test.jpg
+++ b/results_test.jpg
--- a/slika.jpg
+++ b/slika.jpg
--- a/test.jpg
+++ b/test.jpg
--- a/train.py
+++ b/train.py
@@ -1,28 +1,89 @@
 from __future__ import annotations
 import os
 import time
 from pathlib import Path
 import torch
 from ultralytics import YOLO
-model = YOLO("yolo11l.pt")
+from prepare import (
    DEFAULT_DATA_PATH,
    DEFAULT_PROJECT_DIR,
    DEFAULT_TIME_HOURS,
    build_train_kwargs,
    ensure_dataset_exists,
    extract_experiment_summary,
    print_experiment_summary,
    resolve_save_dir,
 )
-data_path = 'ships-aerial-images/data.yaml'
+# The agent is expected to iterate on this file only.
 MODEL_WEIGHTS = os.getenv("YOLO_MODEL", "yolo11l.pt")
 DATA_PATH = Path(os.getenv("YOLO_DATA", str(DEFAULT_DATA_PATH)))
 RUN_NAME = "vessel_detection_yolo11l"
-train_params = {
+TRAIN_PARAMS = {
-    'epochs': 40,
+    "epochs": 40,
-    'batch': 32,
+    "time": DEFAULT_TIME_HOURS,
-    'imgsz': 640,
+    "batch": 32,
-    'lr0': 5e-4,
+    "imgsz": 640,
-    'lrf': 0.1,
+    "lr0": 5e-4,
-    'warmup_epochs': 5,
+    "lrf": 0.1,
-    'warmup_bias_lr': 1e-6,
+    "warmup_epochs": 5,
-    'momentum': 0.937,
+    "warmup_bias_lr": 1e-6,
-    'weight_decay': 0.0001,
+    "momentum": 0.937,
-    'optimizer': 'AdamW',
+    "weight_decay": 1e-4,
-    'device': '0,1',
+    "optimizer": "AdamW",
-    'project': 'runs/train',
+    "device": "0",
-    'name': 'vessel_deteciton_v11l',
+    "project": str(DEFAULT_PROJECT_DIR),
-    'exist_ok': True,
+    "name": RUN_NAME,
-    'save_period': 2,
+    "exist_ok": True,
-    'workers': 8,
+    "save_period": 2,
-    'patience': 20,           
+    "workers": 8,
-    'cos_lr': True,            
+    "patience": 20,
    "cos_lr": True,
    "seed": 42,
    "deterministic": True,
    "plots": False,
 }
-model.train(data=data_path, **train_params)
+
 def main() -> None:
    ensure_dataset_exists(DATA_PATH)
    train_kwargs = build_train_kwargs(TRAIN_PARAMS)
    save_dir = Path(str(train_kwargs["project"])) / str(train_kwargs["name"])
    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()
    model = YOLO(MODEL_WEIGHTS)
    start_time = time.time()
    train_result = model.train(data=str(DATA_PATH), **train_kwargs)
    elapsed_seconds = time.time() - start_time
    peak_vram_mb = (
        torch.cuda.max_memory_allocated() / 1024 / 1024
        if torch.cuda.is_available()
        else 0.0
    )
    result_save_dir = getattr(train_result, "save_dir", None)
    save_dir = resolve_save_dir(
        project_dir=Path(str(train_kwargs["project"])),
        run_name=str(train_kwargs["name"]),
        expected_save_dir=Path(result_save_dir) if result_save_dir else save_dir,
    )
    summary = extract_experiment_summary(
        save_dir=save_dir,
        elapsed_seconds=elapsed_seconds,
        peak_vram_mb=peak_vram_mb,
        data_path=DATA_PATH,
        model_name=MODEL_WEIGHTS,
    )
    print_experiment_summary(summary)
 if __name__ == "__main__":
    main()
--- a/uv.lock
+++ b/uv.lock