Cleanup and rename

This commit is contained in:
2026-03-13 00:01:14 +01:00
parent c90be4a981
commit 3882e3b0a6
5 changed files with 75 additions and 57 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,5 @@
node_modules/ node_modules/
.env .env
dist/ dist/
data/*.parquet data/**/*.parquet
*.log *.log

View File

@@ -5,8 +5,8 @@ Real-time Skopje public transport tracking with Bun, GTFS/GTFS-RT ingestion, par
## What Is In This Repo ## What Is In This Repo
- `bus-tracker-json.ts`: terminal tracker for one stop + one route. - `bus-tracker-json.ts`: terminal tracker for one stop + one route.
- `background-tracker.ts`: continuous collector for multiple routes/stops. - `index.ts`: continuous collector for multiple routes/stops.
- `lib/database.ts`: parquet write layer with rolling segments and optional S3 upload. - `lib/storage.ts`: parquet write layer with rolling segments and optional S3 upload.
- `lib/gtfs.ts`: GTFS CSV loading helpers. - `lib/gtfs.ts`: GTFS CSV loading helpers.
- `config.ts`: API base URL, defaults, and tracker timing. - `config.ts`: API base URL, defaults, and tracker timing.
@@ -25,19 +25,19 @@ bun run typecheck
Run single stop/route terminal tracker: Run single stop/route terminal tracker:
```bash ```bash
bun run tracker bun run bus-tracker-json.ts
``` ```
Run with custom stop and route IDs: Run with custom stop and route IDs:
```bash ```bash
bun run tracker -- --stop 1571 --route 125 bun run bus-tracker-json.ts --stop 1571 --route 125
``` ```
Run background collection pipeline: Run background collection pipeline:
```bash ```bash
bun run track bun run start
``` ```
## Environment ## Environment
@@ -58,13 +58,11 @@ Key variables:
## Scripts ## Scripts
- `bun run start`: alias for the terminal tracker. - `bun run start`: collector entrypoint (`index.ts`).
- `bun run tracker`: terminal tracker.
- `bun run track`: background collector.
- `bun run typecheck`: TypeScript no-emit check. - `bun run typecheck`: TypeScript no-emit check.
## Notes ## Notes
- Generated parquet files are intentionally ignored by git (`data/*.parquet`). - Generated parquet files are intentionally ignored by git (`data/**/*.parquet`).
- The background tracker rotates segments and uploads each closed segment when S3 is enabled. - The collector rotates segments and uploads each closed segment when S3 is enabled.
- On process shutdown (`SIGINT`/`SIGTERM`), writers are flushed so the current segment is finalized. - On process shutdown (`SIGINT`/`SIGTERM`), writers are flushed so the current segment is finalized.

View File

@@ -1,21 +1,15 @@
#!/usr/bin/env bun
/**
* Background tracker for popular bus routes in Skopje
* Continuously monitors GTFS-RT feeds and stores data as parquet segments
*/
import GtfsRealtimeBindings from 'gtfs-realtime-bindings'; import GtfsRealtimeBindings from 'gtfs-realtime-bindings';
import { config } from './config'; import { config } from './config';
import { GtfsRoute, GtfsStop, loadGtfsRoutes, loadGtfsStops } from './lib/gtfs'; import { GtfsRoute, GtfsStop, loadGtfsRoutes, loadGtfsStops } from './lib/gtfs';
import { import {
initDatabase, initStorage,
logVehiclePositions, logVehiclePositions,
logVehicleFeedSnapshot, logVehicleFeedSnapshot,
logArrival, logArrival,
closeDatabase, closeStorage,
VehicleFeedSnapshot, VehicleFeedSnapshot,
VehiclePosition VehiclePosition
} from './lib/database'; } from './lib/storage';
// Popular routes to track // Popular routes to track
const TRACKED_ROUTES = [ const TRACKED_ROUTES = [
@@ -53,6 +47,7 @@ const REFRESH_INTERVAL = 30000; // 30 seconds
const ARRIVAL_STOP_CAP = 150; // Max stops to query per cycle const ARRIVAL_STOP_CAP = 150; // Max stops to query per cycle
const SAVE_ALL_VEHICLE_SNAPSHOTS = (process.env.SAVE_ALL_VEHICLE_SNAPSHOTS ?? 'true').toLowerCase() === 'true'; const SAVE_ALL_VEHICLE_SNAPSHOTS = (process.env.SAVE_ALL_VEHICLE_SNAPSHOTS ?? 'true').toLowerCase() === 'true';
const SAVE_ALL_VEHICLE_POSITIONS = (process.env.SAVE_ALL_VEHICLE_POSITIONS ?? 'true').toLowerCase() === 'true'; const SAVE_ALL_VEHICLE_POSITIONS = (process.env.SAVE_ALL_VEHICLE_POSITIONS ?? 'true').toLowerCase() === 'true';
const VERBOSE_TRACKER_LOGS = (process.env.VERBOSE_TRACKER_LOGS ?? 'false').toLowerCase() === 'true';
let stats = { let stats = {
cycles: 0, cycles: 0,
@@ -109,7 +104,9 @@ async function trackVehicles() {
} }
const allVehicles = await vehiclesResponse.json() as any[]; const allVehicles = await vehiclesResponse.json() as any[];
if (VERBOSE_TRACKER_LOGS) {
console.log(` Found ${allVehicles.length} total vehicles`); console.log(` Found ${allVehicles.length} total vehicles`);
}
if (SAVE_ALL_VEHICLE_SNAPSHOTS && allVehicles.length > 0) { if (SAVE_ALL_VEHICLE_SNAPSHOTS && allVehicles.length > 0) {
const captureTime = Date.now(); const captureTime = Date.now();
@@ -187,19 +184,8 @@ async function trackVehicles() {
} }
} }
if (VERBOSE_TRACKER_LOGS) {
console.log(` Matched ${vehicleRouteMap.size} vehicles to GTFS routes`); console.log(` Matched ${vehicleRouteMap.size} vehicles to GTFS routes`);
// Debug: Show sample vehicle IDs from both sources
if (vehicleRouteMap.size > 0) {
const sampleGtfsIds = Array.from(vehicleRouteMap.keys()).slice(0, 5);
console.log(` Sample GTFS-RT vehicle IDs: ${sampleGtfsIds.join(', ')}`);
}
if (allVehicles.length > 0) {
const sampleJsonIds = allVehicles.slice(0, 5).map(v =>
`${v.identificationNumber || v.inventoryNumber || 'unknown'}`
);
console.log(` Sample JSON API vehicle IDs: ${sampleJsonIds.join(', ')}`);
} }
// Prepare vehicle positions. // Prepare vehicle positions.
@@ -231,7 +217,7 @@ async function trackVehicles() {
}); });
} }
// Log to database // Persist current cycle positions
if (positions.length > 0) { if (positions.length > 0) {
await logVehiclePositions(positions); await logVehiclePositions(positions);
console.log(` [OK] Logged ${positions.length} vehicle positions${SAVE_ALL_VEHICLE_POSITIONS ? ' (all vehicles mode)' : ''}`); console.log(` [OK] Logged ${positions.length} vehicle positions${SAVE_ALL_VEHICLE_POSITIONS ? ' (all vehicles mode)' : ''}`);
@@ -350,10 +336,10 @@ async function trackArrivals() {
console.log(` [OK] Logged ${arrivalsLogged} new arrivals (${duplicates} duplicates skipped, ${arrivalsFound} total found)`); console.log(` [OK] Logged ${arrivalsLogged} new arrivals (${duplicates} duplicates skipped, ${arrivalsFound} total found)`);
stats.arrivalsLogged += arrivalsLogged; stats.arrivalsLogged += arrivalsLogged;
} else { } else {
console.log(` [INFO] Found ${arrivalsFound} arrivals but all were duplicates (already in database)`); console.log(` [INFO] Found ${arrivalsFound} arrivals but all were duplicates (already recorded)`);
} }
if (matchedRouteCounts.size > 0) { if (VERBOSE_TRACKER_LOGS && matchedRouteCounts.size > 0) {
const matchedSummary = Array.from(matchedRouteCounts.entries()) const matchedSummary = Array.from(matchedRouteCounts.entries())
.sort((a, b) => b[1] - a[1]) .sort((a, b) => b[1] - a[1])
.slice(0, 10) .slice(0, 10)
@@ -362,7 +348,7 @@ async function trackArrivals() {
console.log(` [DEBUG] Matched route IDs: ${matchedSummary}`); console.log(` [DEBUG] Matched route IDs: ${matchedSummary}`);
} }
if (unmatchedRoutes.size > 0) { if (VERBOSE_TRACKER_LOGS && unmatchedRoutes.size > 0) {
const topUnmatched = Array.from(unmatchedRoutes.entries()) const topUnmatched = Array.from(unmatchedRoutes.entries())
.sort((a, b) => b[1] - a[1]) .sort((a, b) => b[1] - a[1])
.slice(0, 8) .slice(0, 8)
@@ -399,11 +385,11 @@ function printStats() {
async function main() { async function main() {
console.log('\nStarting Background Bus Tracker for Popular Routes & Stops\n'); console.log('\nStarting Background Bus Tracker for Popular Routes & Stops\n');
// Initialize database // Initialize storage
try { try {
await initDatabase(); await initStorage();
} catch (error) { } catch (error) {
console.error('Failed to initialize database:', error); console.error('Failed to initialize storage:', error);
console.log('Continuing without data logging...'); console.log('Continuing without data logging...');
} }
@@ -429,14 +415,14 @@ async function main() {
process.on('SIGINT', async () => { process.on('SIGINT', async () => {
console.log('\n\nShutting down tracker...'); console.log('\n\nShutting down tracker...');
printStats(); printStats();
await closeDatabase(); await closeStorage();
process.exit(0); process.exit(0);
}); });
process.on('SIGTERM', async () => { process.on('SIGTERM', async () => {
console.log('\n\nReceived SIGTERM, closing tracker...'); console.log('\n\nReceived SIGTERM, closing tracker...');
printStats(); printStats();
await closeDatabase(); await closeStorage();
process.exit(0); process.exit(0);
}); });
} }

View File

@@ -170,22 +170,41 @@ function segmentId(): string {
return new Date().toISOString().replace(/[.:]/g, '-'); return new Date().toISOString().replace(/[.:]/g, '-');
} }
function partitionPathFromSegmentId(id: string): string {
const match = /^(\d{4})-(\d{2})-(\d{2})T/.exec(id);
if (match) {
const [, year, month, day] = match;
return path.join(`year=${year}`, `month=${month}`, `day=${day}`);
}
const now = new Date();
const year = now.getUTCFullYear().toString();
const month = String(now.getUTCMonth() + 1).padStart(2, '0');
const day = String(now.getUTCDate()).padStart(2, '0');
return path.join(`year=${year}`, `month=${month}`, `day=${day}`);
}
function buildPartitionedFilePath(prefix: string, id: string): string {
return path.join(DATA_DIR, partitionPathFromSegmentId(id), `${prefix}-${id}.parquet`);
}
function buildVehicleFile(id: string): string { function buildVehicleFile(id: string): string {
return path.join(DATA_DIR, `vehicle_positions-${id}.parquet`); return buildPartitionedFilePath('vehicle_positions', id);
} }
function buildArrivalFile(id: string): string { function buildArrivalFile(id: string): string {
return path.join(DATA_DIR, `arrival_records-${id}.parquet`); return buildPartitionedFilePath('arrival_records', id);
} }
function buildSnapshotFile(id: string): string { function buildSnapshotFile(id: string): string {
return path.join(DATA_DIR, `vehicle_snapshots-${id}.parquet`); return buildPartitionedFilePath('vehicle_snapshots', id);
} }
function createFileWriter(filename: string): Writer { function createFileWriter(filename: string): Writer {
const writer = new ByteWriter() as unknown as Writer & { index: number }; const writer = new ByteWriter() as unknown as Writer & { index: number };
const chunkSize = 1_000_000; const chunkSize = 1_000_000;
fsSync.mkdirSync(path.dirname(filename), { recursive: true });
fsSync.writeFileSync(filename, '', { flag: 'w' }); fsSync.writeFileSync(filename, '', { flag: 'w' });
const flush = () => { const flush = () => {
@@ -299,7 +318,12 @@ async function uploadFileToObjectStorage(filePath: string): Promise<boolean> {
} }
const keyPrefix = OBJECT_STORAGE_PREFIX ? `${OBJECT_STORAGE_PREFIX}/` : ''; const keyPrefix = OBJECT_STORAGE_PREFIX ? `${OBJECT_STORAGE_PREFIX}/` : '';
const key = `${keyPrefix}${path.basename(filePath)}`; const relativePath = path.relative(DATA_DIR, filePath);
const normalizedRelativePath =
!relativePath.startsWith('..') && !path.isAbsolute(relativePath)
? relativePath.split(path.sep).join('/')
: path.basename(filePath);
const key = `${keyPrefix}${normalizedRelativePath}`;
const body = await fs.readFile(filePath); const body = await fs.readFile(filePath);
@@ -448,10 +472,14 @@ async function ensureInitialized(): Promise<void> {
console.log(`[OK] Hyparquet storage ready at ${DATA_DIR}, roll=${ROLL_INTERVAL_MS / 60000}m${storageInfo}`); console.log(`[OK] Hyparquet storage ready at ${DATA_DIR}, roll=${ROLL_INTERVAL_MS / 60000}m${storageInfo}`);
} }
export async function initDatabase(): Promise<void> { export async function initStorage(): Promise<void> {
await ensureInitialized(); await ensureInitialized();
} }
export async function initDatabase(): Promise<void> {
await initStorage();
}
export async function logVehiclePosition(position: VehiclePosition): Promise<void> { export async function logVehiclePosition(position: VehiclePosition): Promise<void> {
await logVehiclePositions([position]); await logVehiclePositions([position]);
} }
@@ -566,7 +594,7 @@ export async function cleanupOldData(_daysToKeep: number = 90): Promise<void> {
console.log('cleanupOldData skipped: parquet segment mode'); console.log('cleanupOldData skipped: parquet segment mode');
} }
export async function getDatabaseStats() { export async function getStorageStats() {
await ensureInitialized(); await ensureInitialized();
const [vehicleInfo, arrivalInfo, snapshotInfo] = await Promise.all([ const [vehicleInfo, arrivalInfo, snapshotInfo] = await Promise.all([
@@ -581,9 +609,9 @@ export async function getDatabaseStats() {
vehicleSnapshots: writes.vehicleSnapshots, vehicleSnapshots: writes.vehicleSnapshots,
oldestRecord: null, oldestRecord: null,
newestRecord: null, newestRecord: null,
dbType: 'hyparquet(rolling-write)', storageType: 'hyparquet(rolling-write)',
host: OBJECT_STORAGE_ENABLED ? 'object-storage+local' : 'local-filesystem', host: OBJECT_STORAGE_ENABLED ? 'object-storage+local' : 'local-filesystem',
database: DATA_DIR, storagePath: DATA_DIR,
rolling: { rolling: {
minutes: ROLL_INTERVAL_MS / 60000, minutes: ROLL_INTERVAL_MS / 60000,
currentSegmentId, currentSegmentId,
@@ -616,7 +644,11 @@ export async function getDatabaseStats() {
}; };
} }
export async function closeDatabase(): Promise<void> { export async function getDatabaseStats() {
return getStorageStats();
}
export async function closeStorage(): Promise<void> {
if (!initialized) { if (!initialized) {
return; return;
} }
@@ -630,3 +662,7 @@ export async function closeDatabase(): Promise<void> {
await enqueueRotation('shutdown', false); await enqueueRotation('shutdown', false);
initialized = false; initialized = false;
} }
export async function closeDatabase(): Promise<void> {
await closeStorage();
}

View File

@@ -1,12 +1,10 @@
{ {
"name": "skopje-bus-tracker", "name": "openjsp",
"version": "1.0.0", "version": "1.0.0",
"description": "Real-time bus tracking for Skopje public transport", "description": "Real-time bus tracking for Skopje public transport",
"main": "server.ts", "main": "index.ts",
"scripts": { "scripts": {
"start": "bun run bus-tracker-json.ts", "start": "bun run index.ts",
"tracker": "bun run bus-tracker-json.ts",
"track": "bun run background-tracker.ts",
"typecheck": "bunx tsc --noEmit" "typecheck": "bunx tsc --noEmit"
}, },
"keywords": [ "keywords": [