170 lines
5.1 KiB
Python
170 lines
5.1 KiB
Python
import re
|
|
import json
|
|
import os
|
|
import shutil
|
|
from sklearn.cluster import KMeans
|
|
import numpy as np
|
|
from utils.unicleaner import clean_unicode
|
|
|
|
BRAINMAP_PATH = "data/memory/brainmap.json" # actual connection data
|
|
BRAINMAP_CACHE_PATH = "data/memory/brainmap_cache.json" # for dashboard rendering only
|
|
brainmap = {}
|
|
|
|
MAX_CONNECTIONS = 50 # Max neighbors to keep per word
|
|
|
|
|
|
def is_valid_brainword(word: str) -> bool:
|
|
word = clean_unicode(word.strip())
|
|
|
|
if len(word) < 3:
|
|
return False
|
|
if re.fullmatch(r"\d+", word): # Pure numbers
|
|
return False
|
|
if re.fullmatch(r"(i|ii|iii|iv|v|vi|vii|viii|ix|x|xi|xii|xiii|xiv|xv)", word.lower()):
|
|
return False
|
|
if not word.isascii():
|
|
return False
|
|
if re.search(r"[^a-zA-Z0-9\-]", word): # Block weird characters except dash
|
|
return False
|
|
return True
|
|
|
|
|
|
def load_brainmap():
|
|
global brainmap
|
|
if os.path.exists(BRAINMAP_PATH):
|
|
with open(BRAINMAP_PATH, "r", encoding="utf-8") as f:
|
|
brainmap = json.load(f)
|
|
|
|
|
|
def save_brainmap():
|
|
with open(BRAINMAP_PATH, "w", encoding="utf-8") as f:
|
|
json.dump(brainmap, f, indent=2)
|
|
|
|
|
|
def add_to_brainmap(words):
|
|
if isinstance(words, str):
|
|
words = words.split()
|
|
|
|
cleaned_words = [w.lower() for w in words if is_valid_brainword(w)]
|
|
|
|
updated = False
|
|
|
|
for i, word in enumerate(cleaned_words):
|
|
if word not in brainmap:
|
|
brainmap[word] = {}
|
|
updated = True
|
|
|
|
neighbors = cleaned_words[max(0, i-2):i] + cleaned_words[i+1:i+3]
|
|
for neighbor in neighbors:
|
|
if neighbor == word or not is_valid_brainword(neighbor):
|
|
continue
|
|
previous_count = brainmap[word].get(neighbor, 0)
|
|
brainmap[word][neighbor] = previous_count + 1
|
|
if previous_count == 0:
|
|
updated = True
|
|
|
|
# Limit neighbors
|
|
if len(brainmap[word]) > MAX_CONNECTIONS:
|
|
brainmap[word] = dict(sorted(brainmap[word].items(), key=lambda x: x[1], reverse=True)[:MAX_CONNECTIONS])
|
|
|
|
if updated:
|
|
save_brainmap()
|
|
|
|
|
|
def prune_brainmap(min_neighbors=2, min_strength=2):
|
|
"""
|
|
Remove weakly connected or isolated words from the brainmap.
|
|
|
|
Args:
|
|
min_neighbors (int): Minimum neighbors required to keep a word.
|
|
min_strength (int): Minimum strength (connection count) for neighbors.
|
|
"""
|
|
global brainmap
|
|
to_delete = []
|
|
|
|
for word, neighbors in brainmap.items():
|
|
# Clean weak neighbors
|
|
weak_neighbors = [n for n, count in neighbors.items() if count < min_strength]
|
|
for n in weak_neighbors:
|
|
del neighbors[n]
|
|
|
|
# Delete word if too few neighbors remain
|
|
if len(neighbors) < min_neighbors:
|
|
to_delete.append(word)
|
|
|
|
for word in to_delete:
|
|
del brainmap[word]
|
|
|
|
save_brainmap()
|
|
|
|
|
|
def get_brainmap():
|
|
return brainmap
|
|
|
|
|
|
def refresh_brainmap_cache(min_weight=2, max_nodes=300):
|
|
"""
|
|
Generates a clustered brainmap view and writes to:
|
|
- data/memory/brainmap_cache.json (master copy)
|
|
- static/brainmap.json (served to frontend)
|
|
"""
|
|
map_data = get_brainmap()
|
|
links = []
|
|
seen_words = set()
|
|
|
|
for word, connections in map_data.items():
|
|
if not isinstance(connections, dict):
|
|
print(f"[Brainmap] Skipping corrupted entry: {word} => {type(connections)}")
|
|
continue
|
|
for linked_word, weight in connections.items():
|
|
if weight >= min_weight:
|
|
links.append({
|
|
"source": word,
|
|
"target": linked_word,
|
|
"value": weight
|
|
})
|
|
seen_words.add(word)
|
|
seen_words.add(linked_word)
|
|
|
|
node_set = {link["source"] for link in links} | {link["target"] for link in links}
|
|
nodes = sorted(node_set)
|
|
if len(nodes) > max_nodes:
|
|
nodes = nodes[:max_nodes]
|
|
node_set = set(nodes)
|
|
links = [l for l in links if l["source"] in node_set and l["target"] in node_set]
|
|
|
|
index_lookup = {word: i for i, word in enumerate(nodes)}
|
|
word_vectors = []
|
|
for word in nodes:
|
|
vec = np.zeros(len(nodes), dtype=np.float32)
|
|
connections = map_data.get(word, {})
|
|
for other, strength in connections.items():
|
|
if other in index_lookup:
|
|
vec[index_lookup[other]] = strength
|
|
word_vectors.append(vec)
|
|
|
|
if len(word_vectors) < 2:
|
|
print("[Brainmap] Not enough nodes to cluster.")
|
|
return
|
|
|
|
kmeans = KMeans(n_clusters=min(8, len(nodes)), n_init="auto")
|
|
labels = kmeans.fit_predict(word_vectors)
|
|
clustered_nodes = [{"id": word, "group": int(label)} for word, label in zip(nodes, labels)]
|
|
|
|
output = {
|
|
"nodes": clustered_nodes,
|
|
"links": links
|
|
}
|
|
|
|
os.makedirs("data/memory", exist_ok=True)
|
|
os.makedirs("static", exist_ok=True)
|
|
|
|
cache_path = "data/memory/brainmap_cache.json"
|
|
static_path = "static/brainmap.json"
|
|
|
|
with open(cache_path, "w", encoding="utf-8") as f:
|
|
json.dump(output, f, indent=2)
|
|
|
|
shutil.copyfile(cache_path, static_path)
|
|
# print(f"[Brainmap] Cache written to {cache_path} and copied to {static_path}")
|