From 68d9d00123f04429f785b20af94b7a14f533059f Mon Sep 17 00:00:00 2001 From: Dani Date: Tue, 23 Sep 2025 12:44:22 -0400 Subject: [PATCH] feat: add text file reader with normalization and stats preview Adds a new script to read local text files, normalize line endings, and display character statistics and previews. The script handles missing data files gracefully by using a fallback sample and provides detailed output including total characters, unique characters, and a 200-character preview with literal newline representations. --- 01_read_text.py | 102 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 01_read_text.py diff --git a/01_read_text.py b/01_read_text.py new file mode 100644 index 0000000..a07f01b --- /dev/null +++ b/01_read_text.py @@ -0,0 +1,102 @@ +# 01_read_text.py +""" +Read a local text file, normalize newlines, and preview stats. + +Useage: + python 01_read_text.py + +Notes: +- Expects a UTF-8 'data.txt' in the same directory. If missing, uses a fallback +- Prints total chars, unique chars, and a 200-char preview with literal "\\n". +""" + +from __future__ import annotations +from pathlib import Path +from typing import Optional + +FALLBACK = ( + "From fairest creatures we desire increase,\n" + "That thereby beauty's rose might never die,\n" + "But as the riper should by time decease,\n" + "His tender heir might bear his memory.\n" +) + + +def load_text(path: Optional[Path]) -> str: + """Load UTF-8 text from a path if provided/existing, else return fallback. + + Args: + path: Optional path to the text file. + + Returns: + The file contents or a small fallback sample. + + >>> isinstance(load_text(None), str) + True + """ + if path and path.exists(): + return path.read_text(encoding="utf-8") + return FALLBACK + + +def normalize_newlines(text: str) -> str: + """Convert all newlines to '\\n'. + + Args: + text: Raw text. + + Returns: + Text with CRLF/CR normalized to LF ('\\n'). + + >>> normalize_newlines("a\\r\\nb\\rc\\nd") == "a\\nb\\nc\\nd" + True + """ + text = text.replace("\r\n", "\n").replace("\r", "\n") + return text + + +def make_preview(text: str, n_chars: int = 200) -> str: + """Returns a first-N-chars preview with literal newlines escaped. + + Args: + text: Input text. + n_chars: Max characters to preview. + + Returns: + Preview string (newlines shown as '\\n'). + + >>> make_preview("hi\\nthere", 5) + 'hi\\\\nth' + """ + preview = text[:n_chars] + return preview.replace("\n", "\\n") + + +def report_stats(text: str) -> str: + """Produce a human-readable stats report: total chars, unique chars. + + Args: + text: Input text + + Returns: + A multi-line report string. + + >>> "Total chars:" in report_stats("abaca") + True + """ + total = len(text) + uniq = len(set(text)) + return f"Total chars: {total} \n Unique chars: {uniq}" + + +def main() -> None: + """Entry point: load, normlize, and report a preview""" + data_path = Path("data.txt") + text = normalize_newlines(load_text(data_path)) + print(report_stats(text)) + print("Preview (200 chars, \\n shown literally):") + print(make_preview(text, 200)) + + +if __name__ == "__main__": + main()