Updated the README

Added new security layers
This commit is contained in:
2025-08-22 12:39:51 -04:00
parent fb7428064f
commit 720c7e0b52
7 changed files with 695 additions and 552 deletions

View File

@@ -10,15 +10,11 @@ import (
"time"
)
// FSStore stores blobs on the local filesystem under root/objects/...
// It supports both a flat layout (objects/<hash>) and a nested layout
// (objects/<hash>/<file> or objects/<prefix>/<hash>).
type FSStore struct {
root string
objects string
}
// NewFS returns a file-backed blob store rooted at dir.
func NewFS(dir string) (*FSStore, error) {
if dir == "" {
return nil, errors.New("empty storage dir")
@@ -30,7 +26,6 @@ func NewFS(dir string) (*FSStore, error) {
return &FSStore{root: dir, objects: o}, nil
}
// pathFlat returns the flat path objects/<hash>.
func (s *FSStore) pathFlat(hash string) (string, error) {
if hash == "" {
return "", errors.New("empty hash")
@@ -38,7 +33,6 @@ func (s *FSStore) pathFlat(hash string) (string, error) {
return filepath.Join(s.objects, hash), nil
}
// isHexHash does a quick check for lowercase hex of length 64.
func isHexHash(name string) bool {
if len(name) != 64 {
return false
@@ -52,27 +46,14 @@ func isHexHash(name string) bool {
return true
}
// findBlobPath tries common layouts before falling back to a recursive search.
//
// Supported fast paths (in order):
// 1. objects/<hash> (flat file)
// 2. objects/<hash>/blob|data|content (common names)
// 3. objects/<hash>/<single file> (folder-per-post; pick that file)
// 4. objects/<hash[0:2]>/<hash> (two-level prefix sharding)
//
// If still not found, it walks recursively under objects/ to locate either:
// - a file named exactly <hash>, or
// - any file under a directory named <hash> (choose the most recently modified).
func (s *FSStore) findBlobPath(hash string) (string, error) {
if hash == "" {
return "", errors.New("empty hash")
}
// 1) flat file
// 1) flat
if p, _ := s.pathFlat(hash); fileExists(p) {
return p, nil
}
// 2) objects/<hash>/{blob,data,content}
dir := filepath.Join(s.objects, hash)
for _, cand := range []string{"blob", "data", "content"} {
@@ -81,88 +62,67 @@ func (s *FSStore) findBlobPath(hash string) (string, error) {
return p, nil
}
}
// 3) objects/<hash>/<single file>
if st, err := os.Stat(dir); err == nil && st.IsDir() {
ents, err := os.ReadDir(dir)
if err == nil {
var picked string
var pickedMod time.Time
for _, de := range ents {
if de.IsDir() {
continue
}
p := filepath.Join(dir, de.Name())
fi, err := os.Stat(p)
if err != nil || !fi.Mode().IsRegular() {
continue
}
// Pick newest file if multiple.
if picked == "" || fi.ModTime().After(pickedMod) {
picked = p
pickedMod = fi.ModTime()
}
ents, _ := os.ReadDir(dir)
var picked string
var pickedMod time.Time
for _, de := range ents {
if de.IsDir() {
continue
}
if picked != "" {
return picked, nil
p := filepath.Join(dir, de.Name())
fi, err := os.Stat(p)
if err == nil && fi.Mode().IsRegular() {
if picked == "" || fi.ModTime().After(pickedMod) {
picked, pickedMod = p, fi.ModTime()
}
}
}
if picked != "" {
return picked, nil
}
}
// 4) two-level prefix: objects/aa/<hash>
// 4) two-level prefix objects/aa/<hash>
if len(hash) >= 2 {
p := filepath.Join(s.objects, hash[:2], hash)
if fileExists(p) {
return p, nil
}
}
// Fallback: recursive search
// 5) recursive search
var best string
var bestMod time.Time
err := filepath.WalkDir(s.objects, func(p string, d fs.DirEntry, err error) error {
if err != nil {
// ignore per-entry errors
return nil
}
if d.IsDir() {
_ = filepath.WalkDir(s.objects, func(p string, d fs.DirEntry, err error) error {
if err != nil || d.IsDir() {
return nil
}
base := filepath.Base(p)
// Exact filename == hash
if base == hash {
best = p
// exact match is good enough; stop here
return fs.SkipDir
}
// If parent dir name is hash, consider it
parent := filepath.Base(filepath.Dir(p))
if parent == hash {
if fi, err := os.Stat(p); err == nil && fi.Mode().IsRegular() {
if best == "" || fi.ModTime().After(bestMod) {
best = p
bestMod = fi.ModTime()
best, bestMod = p, fi.ModTime()
}
}
}
return nil
})
if err == nil && best != "" {
if best != "" {
return best, nil
}
return "", os.ErrNotExist
}
// fileExists true if path exists and is a regular file.
func fileExists(p string) bool {
fi, err := os.Stat(p)
return err == nil && fi.Mode().IsRegular()
}
// Put writes/overwrites the blob at the content hash into the flat path.
// (Nested layouts remain supported for reads/reindex, but new writes are flat.)
func (s *FSStore) Put(hash string, r io.Reader) error {
p, err := s.pathFlat(hash)
if err != nil {
@@ -189,7 +149,6 @@ func (s *FSStore) Put(hash string, r io.Reader) error {
return os.Rename(tmp, p)
}
// Get opens the blob for reading and returns its size if known.
func (s *FSStore) Get(hash string) (io.ReadCloser, int64, error) {
p, err := s.findBlobPath(hash)
if err != nil {
@@ -206,17 +165,12 @@ func (s *FSStore) Get(hash string) (io.ReadCloser, int64, error) {
return f, st.Size(), nil
}
// Delete removes the blob. It is not an error if it doesn't exist.
// It tries the flat path, common nested paths, then falls back to remove
// any file found via findBlobPath.
func (s *FSStore) Delete(hash string) error {
// Try flat
if p, _ := s.pathFlat(hash); fileExists(p) {
if err := os.Remove(p); err == nil || errors.Is(err, os.ErrNotExist) {
return nil
}
}
// Try common nested
dir := filepath.Join(s.objects, hash)
for _, cand := range []string{"blob", "data", "content"} {
p := filepath.Join(dir, cand)
@@ -234,77 +188,49 @@ func (s *FSStore) Delete(hash string) error {
}
}
}
// Fallback: whatever findBlobPath locates
if p, err := s.findBlobPath(hash); err == nil {
if err := os.Remove(p); err == nil || errors.Is(err, os.ErrNotExist) {
return nil
}
}
// If we couldn't find anything, treat as success (idempotent delete)
return nil
}
// Walk calls fn(hash, size, modTime) for each blob file found.
// It recognizes blobs when either:
// - the file name is a 64-char hex hash, or
// - the parent directory name is that hash (folder-per-post).
//
// If multiple files map to the same hash (e.g., dir contains many files),
// the newest file's size/modTime is reported.
func (s *FSStore) Walk(fn func(hash string, size int64, mod time.Time) error) error {
type rec struct {
size int64
mod time.Time
}
agg := make(map[string]rec)
err := filepath.WalkDir(s.objects, func(p string, d fs.DirEntry, err error) error {
if err != nil {
return nil // skip unreadable entries
}
if d.IsDir() {
_ = filepath.WalkDir(s.objects, func(p string, d fs.DirEntry, err error) error {
if err != nil || d.IsDir() {
return nil
}
// Only consider regular files
fi, err := os.Stat(p)
if err != nil || !fi.Mode().IsRegular() {
return nil
}
base := filepath.Base(p)
// Case 1: filename equals hash
if isHexHash(base) {
if r, ok := agg[base]; !ok || fi.ModTime().After(r.mod) {
agg[base] = rec{size: fi.Size(), mod: fi.ModTime()}
agg[base] = rec{fi.Size(), fi.ModTime()}
}
return nil
}
// Case 2: parent dir is the hash
parent := filepath.Base(filepath.Dir(p))
if isHexHash(parent) {
if r, ok := agg[parent]; !ok || fi.ModTime().After(r.mod) {
agg[parent] = rec{size: fi.Size(), mod: fi.ModTime()}
agg[parent] = rec{fi.Size(), fi.ModTime()}
}
return nil
}
// Case 3: two-level prefix layout e.g. objects/aa/<hash>
// If parent is a 2-char dir and grandparent is objects/, base might be hash.
if len(base) == 64 && isHexHash(strings.ToLower(base)) {
// already handled as Case 1, but keep as safety if different casing sneaks in
if r, ok := agg[base]; !ok || fi.ModTime().After(r.mod) {
agg[base] = rec{size: fi.Size(), mod: fi.ModTime()}
agg[base] = rec{fi.Size(), fi.ModTime()}
}
return nil
}
return nil
})
if err != nil {
return err
}
for h, r := range agg {
if err := fn(h, r.size, r.mod); err != nil {
return err