315 lines
7.7 KiB
Go
315 lines
7.7 KiB
Go
package storage
|
|
|
|
import (
|
|
"errors"
|
|
"io"
|
|
"io/fs"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// FSStore stores blobs on the local filesystem under root/objects/...
|
|
// It supports both a flat layout (objects/<hash>) and a nested layout
|
|
// (objects/<hash>/<file> or objects/<prefix>/<hash>).
|
|
type FSStore struct {
|
|
root string
|
|
objects string
|
|
}
|
|
|
|
// NewFS returns a file-backed blob store rooted at dir.
|
|
func NewFS(dir string) (*FSStore, error) {
|
|
if dir == "" {
|
|
return nil, errors.New("empty storage dir")
|
|
}
|
|
o := filepath.Join(dir, "objects")
|
|
if err := os.MkdirAll(o, 0o755); err != nil {
|
|
return nil, err
|
|
}
|
|
return &FSStore{root: dir, objects: o}, nil
|
|
}
|
|
|
|
// pathFlat returns the flat path objects/<hash>.
|
|
func (s *FSStore) pathFlat(hash string) (string, error) {
|
|
if hash == "" {
|
|
return "", errors.New("empty hash")
|
|
}
|
|
return filepath.Join(s.objects, hash), nil
|
|
}
|
|
|
|
// isHexHash does a quick check for lowercase hex of length 64.
|
|
func isHexHash(name string) bool {
|
|
if len(name) != 64 {
|
|
return false
|
|
}
|
|
for i := 0; i < 64; i++ {
|
|
c := name[i]
|
|
if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// findBlobPath tries common layouts before falling back to a recursive search.
|
|
//
|
|
// Supported fast paths (in order):
|
|
// 1. objects/<hash> (flat file)
|
|
// 2. objects/<hash>/blob|data|content (common names)
|
|
// 3. objects/<hash>/<single file> (folder-per-post; pick that file)
|
|
// 4. objects/<hash[0:2]>/<hash> (two-level prefix sharding)
|
|
//
|
|
// If still not found, it walks recursively under objects/ to locate either:
|
|
// - a file named exactly <hash>, or
|
|
// - any file under a directory named <hash> (choose the most recently modified).
|
|
func (s *FSStore) findBlobPath(hash string) (string, error) {
|
|
if hash == "" {
|
|
return "", errors.New("empty hash")
|
|
}
|
|
|
|
// 1) flat file
|
|
if p, _ := s.pathFlat(hash); fileExists(p) {
|
|
return p, nil
|
|
}
|
|
|
|
// 2) objects/<hash>/{blob,data,content}
|
|
dir := filepath.Join(s.objects, hash)
|
|
for _, cand := range []string{"blob", "data", "content"} {
|
|
p := filepath.Join(dir, cand)
|
|
if fileExists(p) {
|
|
return p, nil
|
|
}
|
|
}
|
|
|
|
// 3) objects/<hash>/<single file>
|
|
if st, err := os.Stat(dir); err == nil && st.IsDir() {
|
|
ents, err := os.ReadDir(dir)
|
|
if err == nil {
|
|
var picked string
|
|
var pickedMod time.Time
|
|
for _, de := range ents {
|
|
if de.IsDir() {
|
|
continue
|
|
}
|
|
p := filepath.Join(dir, de.Name())
|
|
fi, err := os.Stat(p)
|
|
if err != nil || !fi.Mode().IsRegular() {
|
|
continue
|
|
}
|
|
// Pick newest file if multiple.
|
|
if picked == "" || fi.ModTime().After(pickedMod) {
|
|
picked = p
|
|
pickedMod = fi.ModTime()
|
|
}
|
|
}
|
|
if picked != "" {
|
|
return picked, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// 4) two-level prefix: objects/aa/<hash>
|
|
if len(hash) >= 2 {
|
|
p := filepath.Join(s.objects, hash[:2], hash)
|
|
if fileExists(p) {
|
|
return p, nil
|
|
}
|
|
}
|
|
|
|
// Fallback: recursive search
|
|
var best string
|
|
var bestMod time.Time
|
|
|
|
err := filepath.WalkDir(s.objects, func(p string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
// ignore per-entry errors
|
|
return nil
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
base := filepath.Base(p)
|
|
// Exact filename == hash
|
|
if base == hash {
|
|
best = p
|
|
// exact match is good enough; stop here
|
|
return fs.SkipDir
|
|
}
|
|
// If parent dir name is hash, consider it
|
|
parent := filepath.Base(filepath.Dir(p))
|
|
if parent == hash {
|
|
if fi, err := os.Stat(p); err == nil && fi.Mode().IsRegular() {
|
|
if best == "" || fi.ModTime().After(bestMod) {
|
|
best = p
|
|
bestMod = fi.ModTime()
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
})
|
|
if err == nil && best != "" {
|
|
return best, nil
|
|
}
|
|
|
|
return "", os.ErrNotExist
|
|
}
|
|
|
|
// fileExists true if path exists and is a regular file.
|
|
func fileExists(p string) bool {
|
|
fi, err := os.Stat(p)
|
|
return err == nil && fi.Mode().IsRegular()
|
|
}
|
|
|
|
// Put writes/overwrites the blob at the content hash into the flat path.
|
|
// (Nested layouts remain supported for reads/reindex, but new writes are flat.)
|
|
func (s *FSStore) Put(hash string, r io.Reader) error {
|
|
p, err := s.pathFlat(hash)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil {
|
|
return err
|
|
}
|
|
tmp := p + ".tmp"
|
|
f, err := os.Create(tmp)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
_, werr := io.Copy(f, r)
|
|
cerr := f.Close()
|
|
if werr != nil {
|
|
_ = os.Remove(tmp)
|
|
return werr
|
|
}
|
|
if cerr != nil {
|
|
_ = os.Remove(tmp)
|
|
return cerr
|
|
}
|
|
return os.Rename(tmp, p)
|
|
}
|
|
|
|
// Get opens the blob for reading and returns its size if known.
|
|
func (s *FSStore) Get(hash string) (io.ReadCloser, int64, error) {
|
|
p, err := s.findBlobPath(hash)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
f, err := os.Open(p)
|
|
if err != nil {
|
|
return nil, 0, err
|
|
}
|
|
st, err := f.Stat()
|
|
if err != nil {
|
|
return f, 0, nil
|
|
}
|
|
return f, st.Size(), nil
|
|
}
|
|
|
|
// Delete removes the blob. It is not an error if it doesn't exist.
|
|
// It tries the flat path, common nested paths, then falls back to remove
|
|
// any file found via findBlobPath.
|
|
func (s *FSStore) Delete(hash string) error {
|
|
// Try flat
|
|
if p, _ := s.pathFlat(hash); fileExists(p) {
|
|
if err := os.Remove(p); err == nil || errors.Is(err, os.ErrNotExist) {
|
|
return nil
|
|
}
|
|
}
|
|
// Try common nested
|
|
dir := filepath.Join(s.objects, hash)
|
|
for _, cand := range []string{"blob", "data", "content"} {
|
|
p := filepath.Join(dir, cand)
|
|
if fileExists(p) {
|
|
if err := os.Remove(p); err == nil || errors.Is(err, os.ErrNotExist) {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
if len(hash) >= 2 {
|
|
p := filepath.Join(s.objects, hash[:2], hash)
|
|
if fileExists(p) {
|
|
if err := os.Remove(p); err == nil || errors.Is(err, os.ErrNotExist) {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
// Fallback: whatever findBlobPath locates
|
|
if p, err := s.findBlobPath(hash); err == nil {
|
|
if err := os.Remove(p); err == nil || errors.Is(err, os.ErrNotExist) {
|
|
return nil
|
|
}
|
|
}
|
|
// If we couldn't find anything, treat as success (idempotent delete)
|
|
return nil
|
|
}
|
|
|
|
// Walk calls fn(hash, size, modTime) for each blob file found.
|
|
// It recognizes blobs when either:
|
|
// - the file name is a 64-char hex hash, or
|
|
// - the parent directory name is that hash (folder-per-post).
|
|
//
|
|
// If multiple files map to the same hash (e.g., dir contains many files),
|
|
// the newest file's size/modTime is reported.
|
|
func (s *FSStore) Walk(fn func(hash string, size int64, mod time.Time) error) error {
|
|
type rec struct {
|
|
size int64
|
|
mod time.Time
|
|
}
|
|
|
|
agg := make(map[string]rec)
|
|
|
|
err := filepath.WalkDir(s.objects, func(p string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return nil // skip unreadable entries
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
// Only consider regular files
|
|
fi, err := os.Stat(p)
|
|
if err != nil || !fi.Mode().IsRegular() {
|
|
return nil
|
|
}
|
|
base := filepath.Base(p)
|
|
|
|
// Case 1: filename equals hash
|
|
if isHexHash(base) {
|
|
if r, ok := agg[base]; !ok || fi.ModTime().After(r.mod) {
|
|
agg[base] = rec{size: fi.Size(), mod: fi.ModTime()}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Case 2: parent dir is the hash
|
|
parent := filepath.Base(filepath.Dir(p))
|
|
if isHexHash(parent) {
|
|
if r, ok := agg[parent]; !ok || fi.ModTime().After(r.mod) {
|
|
agg[parent] = rec{size: fi.Size(), mod: fi.ModTime()}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Case 3: two-level prefix layout e.g. objects/aa/<hash>
|
|
// If parent is a 2-char dir and grandparent is objects/, base might be hash.
|
|
if len(base) == 64 && isHexHash(strings.ToLower(base)) {
|
|
// already handled as Case 1, but keep as safety if different casing sneaks in
|
|
if r, ok := agg[base]; !ok || fi.ModTime().After(r.mod) {
|
|
agg[base] = rec{size: fi.Size(), mod: fi.ModTime()}
|
|
}
|
|
return nil
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for h, r := range agg {
|
|
if err := fn(h, r.size, r.mod); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|