diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..74c6dfe --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +.vscode/ \ No newline at end of file diff --git a/blobs/4a/c6/4ac692e9419ac00b30ba1c179f95e5c162e9637a38474a03a5e8d9f59aec83f5 b/blobs/4a/c6/4ac692e9419ac00b30ba1c179f95e5c162e9637a38474a03a5e8d9f59aec83f5 new file mode 100644 index 0000000..54c5062 Binary files /dev/null and b/blobs/4a/c6/4ac692e9419ac00b30ba1c179f95e5c162e9637a38474a03a5e8d9f59aec83f5 differ diff --git a/blobs/9a/4e/9a4e14659febe633544b4dec31e617af0c72a8630daadc637867f3f9fd749b12 b/blobs/9a/4e/9a4e14659febe633544b4dec31e617af0c72a8630daadc637867f3f9fd749b12 new file mode 100644 index 0000000..e946674 Binary files /dev/null and b/blobs/9a/4e/9a4e14659febe633544b4dec31e617af0c72a8630daadc637867f3f9fd749b12 differ diff --git a/data/chunking.go b/data/chunking.go new file mode 100644 index 0000000..c28d9ac --- /dev/null +++ b/data/chunking.go @@ -0,0 +1,97 @@ +package data + +import ( + "crypto/md5" + "crypto/sha256" + "encoding/hex" + "fmt" + "fs/models" + "io" + "os" + "path/filepath" +) + +const chunkSize = 64 * 1024 +const blobRoot = "blobs/" + +func IngestStream(bucket, key, contentType string, stream io.Reader) (*models.ObjectManifest, error) { + manifest := &models.ObjectManifest{ + Bucket: bucket, + Key: key, + ContentType: contentType, + } + + fullFileHasher := md5.New() + + buffer := make([]byte, chunkSize) + var totalSize int64 + + for { + bytesRead, err := io.ReadFull(stream, buffer) + if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { + return nil, err + } + + if bytesRead > 0 { + chunkData := buffer[:bytesRead] + totalSize += int64(bytesRead) + + fullFileHasher.Write(chunkData) + + chunkHash := sha256.Sum256(chunkData) + chunkID := hex.EncodeToString(chunkHash[:]) + + err := saveBlob(chunkID, chunkData) + if err != nil { + return nil, err + } + manifest.Chunks = append(manifest.Chunks, chunkID) + } + if err == io.EOF || err == io.ErrUnexpectedEOF { + break + } + if err != nil { + return nil, err + } + + } + + manifest.Size = totalSize + manifest.ETag = fmt.Sprintf(`"%s"`, hex.EncodeToString(fullFileHasher.Sum(nil))) + + return manifest, nil + +} + +func saveBlob(chunkID string, data []byte) error { + dir := filepath.Join(blobRoot, chunkID[:2], chunkID[2:4]) + if err := os.MkdirAll(dir, 0755); err != nil { + return err + } + + fullPath := filepath.Join(dir, chunkID) + if _, err := os.Stat(fullPath); os.IsNotExist(err) { + if err := os.WriteFile(fullPath, data, 0644); err != nil { + return err + } + } + + return nil +} + +func GetBlob(chunkID string) ([]byte, error) { + + return os.ReadFile(filepath.Join(blobRoot, chunkID[:2], chunkID[2:4], chunkID)) +} + +func GetObject(manifest *models.ObjectManifest) ([]byte, error) { + var fullData []byte + for _, chunkID := range manifest.Chunks { + chunkData, err := GetBlob(chunkID) + if err != nil { + return nil, err + } + fullData = append(fullData, chunkData...) + } + return fullData, nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..81e9a66 --- /dev/null +++ b/go.mod @@ -0,0 +1,10 @@ +module fs + +go 1.25.7 + +require ( + github.com/klauspost/cpuid/v2 v2.3.0 // indirect + github.com/klauspost/reedsolomon v1.13.2 // indirect + go.etcd.io/bbolt v1.4.3 // indirect + golang.org/x/sys v0.41.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..b54a6cc --- /dev/null +++ b/go.sum @@ -0,0 +1,10 @@ +github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= +github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/klauspost/reedsolomon v1.13.2 h1:9qtQy2tKEVpVB8Pfq87ZljHZb60/LbeTQ1OxV8EGzdE= +github.com/klauspost/reedsolomon v1.13.2/go.mod h1:ggJT9lc71Vu+cSOPBlxGvBN6TfAS77qB4fp8vJ05NSA= +go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo= +go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= diff --git a/main.go b/main.go new file mode 100644 index 0000000..81b6411 --- /dev/null +++ b/main.go @@ -0,0 +1,40 @@ +package main + +import ( + "fmt" + "os" + + "fs/data" +) + +func main() { + fmt.Println("Hello, World!") + imageStream, err := os.Open("fer.jpg") + if err != nil { + fmt.Printf("Error opening image stream: %v\n", err) + return + } + defer imageStream.Close() + + fmt.Fprint(imageStream) + + manifest, err := data.IngestStream("test-bucket-ferdzo", "fer.jpg", "image/jpeg", imageStream) + if err != nil { + fmt.Printf("Error ingesting stream: %v\n", err) + return + } + fmt.Printf("Manifest: %+v\n", manifest) + + objectData, err := data.GetObject(manifest) + if err != nil { + fmt.Printf("Error retrieving object: %v\n", err) + return + } + fmt.Printf("Retrieved object data length: %d\n", len(objectData)) + + err = os.WriteFile("recovered"+manifest.Key, objectData, 0644) + if err != nil { + fmt.Printf("Error writing recovered file: %v\n", err) + return + } +} diff --git a/models/models.go b/models/models.go new file mode 100644 index 0000000..d176d88 --- /dev/null +++ b/models/models.go @@ -0,0 +1,11 @@ +package models + +type ObjectManifest struct { + Bucket string `json:"bucket"` + Key string `json:"key"` + Size int64 `json:"size"` + ContentType string `json:"content_type"` + ETag string `json:"etag"` + Chunks []string `json:"chunks"` + CreatedAt int64 `json:"created_at"` +} diff --git a/my.db b/my.db new file mode 100644 index 0000000..6147f2a Binary files /dev/null and b/my.db differ