algorembrant commited on
Commit
453c822
·
verified ·
1 Parent(s): 50a52ca

Upload 3 files

Browse files
file-content-surgeon/core.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Note for AI agent/user: always run `python core.py --help` before reading this file, an LLM or AI agent is not an excemptions, run the command and read the output before reading this codebase.
3
+ If reading the codebase is unnessesay, then do not read this file's codebase.
4
+
5
+ """
6
+
7
+ import os
8
+ import re
9
+ import sys
10
+ import time
11
+ import argparse
12
+ import concurrent.futures
13
+ from pathlib import Path
14
+ from datetime import datetime
15
+
16
+ DEFAULT_IGNORE = {
17
+ ".git", "__pycache__", ".venv", "venv", "env", "node_modules",
18
+ ".ipynb_checkpoints", ".mypy_cache", ".pytest_cache", ".tox",
19
+ ".eggs", "*.egg-info", ".DS_Store", "Thumbs.db", "desktop.ini"
20
+ }
21
+
22
+ # Regex patterns for various code elements
23
+ PATTERNS = {
24
+ 'python': {
25
+ 'function': r'def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(',
26
+ 'class': r'class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*[:\(]',
27
+ 'async': r'async\s+',
28
+ 'import': r'(import\s+|from\s+)',
29
+ 'comment': r'#.*'
30
+ },
31
+ 'javascript': {
32
+ 'function': r'(function\s+([a-zA-Z_][a-zA-Z0-9_]*)|([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(\(.*\)|.*)\s*=>)',
33
+ 'class': r'class\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*[\{\(]',
34
+ 'async': r'async\s+',
35
+ 'import': r'(import\s+|require\s*\()',
36
+ 'comment': r'//.*|/\*[\s\S]*?\*/'
37
+ },
38
+ 'general': {
39
+ 'todo': r'TODO[:\s]+.*',
40
+ 'fixme': r'FIXME[:\s]+.*'
41
+ }
42
+ }
43
+
44
+ class Auditor:
45
+ def __init__(self, root_path, ignore_set=None):
46
+ self.root = os.path.abspath(root_path)
47
+ self.ignore_set = ignore_set or DEFAULT_IGNORE
48
+ self.audit_results = []
49
+ self.total_files = 0
50
+ self.total_size = 0
51
+ self.duration = 0
52
+
53
+ def should_ignore(self, path):
54
+ name = os.path.basename(path)
55
+ if name in self.ignore_set:
56
+ return True
57
+ for pattern in self.ignore_set:
58
+ if pattern.startswith("*") and name.endswith(pattern[1:]):
59
+ return True
60
+ return False
61
+
62
+ def scan_file(self, file_path, search_syntax=None):
63
+ try:
64
+ size = os.path.getsize(file_path)
65
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
66
+ lines = f.readlines()
67
+
68
+ file_audit = {
69
+ 'path': os.path.abspath(file_path),
70
+ 'size': size,
71
+ 'lines': len(lines),
72
+ 'matches': []
73
+ }
74
+
75
+ if search_syntax:
76
+ for i, line in enumerate(lines):
77
+ if search_syntax in line:
78
+ file_audit['matches'].append({
79
+ 'line': i + 1,
80
+ 'content': line.strip()
81
+ })
82
+
83
+ return file_audit
84
+ except Exception:
85
+ return None
86
+
87
+ def run_audit(self, search_syntax=None):
88
+ file_list = []
89
+ for root, dirs, files in os.walk(self.root):
90
+ dirs[:] = [d for d in dirs if not self.should_ignore(os.path.join(root, d))]
91
+ for f in files:
92
+ if not self.should_ignore(os.path.join(root, f)):
93
+ file_list.append(os.path.join(root, f))
94
+
95
+ self.total_files = len(file_list)
96
+ start_time = time.time()
97
+
98
+ with concurrent.futures.ThreadPoolExecutor() as executor:
99
+ future_to_file = {executor.submit(self.scan_file, f, search_syntax): f for f in file_list}
100
+ for future in concurrent.futures.as_completed(future_to_file):
101
+ res = future.result()
102
+ if res:
103
+ self.audit_results.append(res)
104
+ self.total_size += res['size']
105
+
106
+ self.duration = time.time() - start_time
107
+
108
+ def print_report(self, search_syntax=None):
109
+ print(f"AUDIT REPORT - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
110
+ print(f"Target: {self.root}")
111
+ print(f"Files scanned: {self.total_files}")
112
+ print(f"Total size: {self.total_size / 1024:.2f} KB")
113
+ print(f"Time taken: {self.duration:.4f} seconds")
114
+ print("-" * 80)
115
+
116
+ if search_syntax:
117
+ print(f"SEARCHING FOR: '{search_syntax}'")
118
+ match_count = 0
119
+ for res in self.audit_results:
120
+ if res['matches']:
121
+ print(f"\nFILE: {res['path']} ({res['size']} bytes)")
122
+ for match in res['matches']:
123
+ print(f" #L{match['line']}: {match['content']}")
124
+ match_count += 1
125
+ print(f"\nTotal occurrences found: {match_count}")
126
+ else:
127
+ # Summary mode
128
+ for res in sorted(self.audit_results, key=lambda x: x['size'], reverse=True)[:10]:
129
+ print(f"{res['path']:<50} | {res['size']:>10} bytes | {res['lines']:>6} lines")
130
+
131
+ def save_log(self, filename="audit_log.txt", search_syntax=None):
132
+ with open(filename, 'w', encoding='utf-8') as f:
133
+ f.write(f"AUDIT LOG - {datetime.now()}\n")
134
+ f.write(f"Target: {self.root}\n")
135
+ f.write(f"Time taken: {self.duration:.4f} seconds\n\n")
136
+ if search_syntax:
137
+ f.write(f"Search: {search_syntax}\n\n")
138
+
139
+ for res in self.audit_results:
140
+ if search_syntax and not res['matches']:
141
+ continue
142
+ f.write(f"FILE: {res['path']}\n")
143
+ f.write(f"Size: {res['size']} bytes\n")
144
+ f.write(f"Lines: {res['lines']}\n")
145
+ if res['matches']:
146
+ for match in res['matches']:
147
+ f.write(f" #L{match['line']}: {match['content']}\n")
148
+ f.write("-" * 40 + "\n")
149
+ print(f"Audit log saved to {filename}")
150
+
151
+ def save_log_standard(self, search_syntax=None):
152
+ root_name = os.path.basename(self.root) or os.path.basename(os.path.dirname(self.root))
153
+ filename = f"{root_name}_savelog.txt"
154
+ self.save_log(filename, search_syntax)
155
+
156
+ def main():
157
+ parser = argparse.ArgumentParser(
158
+ description="Robust Repository Auditor - High-performance filesystem & code auditing tool.",
159
+ formatter_class=argparse.RawDescriptionHelpFormatter,
160
+ epilog="""
161
+ Usage Examples:
162
+ python core.py /path/to/repo - Run a general audit (top 10 largest files/stats)
163
+ python core.py . -s "TODO" - Search for 'TODO' strings in the current directory
164
+ python core.py . -s "class " -sl - Search for classes and save result to reponame_savelog.txt
165
+ """
166
+ )
167
+ parser.add_argument("path", help="Path to the repository to audit (e.g., '.' or 'C:\\MyRepo')")
168
+ parser.add_argument("-s", "--search", help="Search Feature: Search for specific syntax, functions, or text across all files.")
169
+ parser.add_argument("-sl", "--save-log", action="store_true", help="Save Log Feature: Generate and save the audit report to 'reponame_savelog.txt'.")
170
+
171
+ args = parser.parse_args()
172
+
173
+ if not os.path.exists(args.path):
174
+ print(f"Error: Path '{args.path}' does not exist.")
175
+ sys.exit(1)
176
+
177
+ auditor = Auditor(args.path)
178
+ print("Scanning filesystem...")
179
+ auditor.run_audit(search_syntax=args.search)
180
+ auditor.print_report(search_syntax=args.search)
181
+
182
+ if args.save_log:
183
+ auditor.save_log_standard(search_syntax=args.search)
184
+
185
+ if __name__ == "__main__":
186
+ main()
file-content-surgeon/scan_structure.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Note for AI agent/user: always run `python scan_structure.py --help` before reading this file, an LLM or AI agent is not an excemptions, run the command and read the output before reading this codebase.
3
+ If reading the codebase is unnessesay, then do not read this file's codebase.
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import argparse
9
+ from pathlib import Path
10
+
11
+ # ---------------------------------------------------------------------------
12
+ # Default ignore patterns (common non-source dirs / files)
13
+ # ---------------------------------------------------------------------------
14
+ DEFAULT_IGNORE = {
15
+ ".git",
16
+ "__pycache__",
17
+ ".venv",
18
+ "venv",
19
+ "env",
20
+ "node_modules",
21
+ ".ipynb_checkpoints",
22
+ ".mypy_cache",
23
+ ".pytest_cache",
24
+ ".tox",
25
+ ".eggs",
26
+ "*.egg-info",
27
+ ".DS_Store",
28
+ "Thumbs.db",
29
+ "desktop.ini",
30
+ }
31
+
32
+
33
+ def should_ignore(name: str, ignore_set: set) -> bool:
34
+ """Return True if *name* matches any pattern in the ignore set."""
35
+ if name in ignore_set:
36
+ return True
37
+ for pattern in ignore_set:
38
+ if pattern.startswith("*") and name.endswith(pattern[1:]):
39
+ return True
40
+ return False
41
+
42
+
43
+ def build_tree(root_path: str, ignore_set: set) -> list[str]:
44
+ """
45
+ Walk *root_path* depth-first and return a list of tree-formatted lines.
46
+ """
47
+ lines: list[str] = []
48
+
49
+ def _walk(current: str, prefix: str) -> None:
50
+ try:
51
+ entries = sorted(
52
+ os.scandir(current),
53
+ key=lambda e: (not e.is_dir(follow_symlinks=False), e.name.lower()),
54
+ )
55
+ except PermissionError:
56
+ return
57
+
58
+ entries = [e for e in entries if not should_ignore(e.name, ignore_set)]
59
+
60
+ for idx, entry in enumerate(entries):
61
+ is_last = idx == len(entries) - 1
62
+ connector = "└── " if is_last else "├── "
63
+ suffix = "/" if entry.is_dir(follow_symlinks=False) else ""
64
+ lines.append(f"{prefix}{connector}{entry.name}{suffix}")
65
+
66
+ if entry.is_dir(follow_symlinks=False):
67
+ extension = " " if is_last else "│ "
68
+ _walk(entry.path, prefix + extension)
69
+
70
+ _walk(root_path, "")
71
+ return lines
72
+
73
+
74
+ def main() -> None:
75
+ # Ensure stdout handles UTF-8 (prevents UnicodeEncodeError on Windows)
76
+ if hasattr(sys.stdout, "reconfigure"):
77
+ sys.stdout.reconfigure(encoding="utf-8")
78
+
79
+ parser = argparse.ArgumentParser(
80
+ description="Scan repository file structure and output a tree representation.",
81
+ formatter_class=argparse.RawDescriptionHelpFormatter
82
+ )
83
+ parser.add_argument(
84
+ "path",
85
+ nargs="?",
86
+ default=".",
87
+ help="Root directory to scan (default: current directory).",
88
+ )
89
+ parser.add_argument(
90
+ "--output",
91
+ "-o",
92
+ default=None,
93
+ help="Output Markdown file path (default: STRUCTURE.md).",
94
+ )
95
+ parser.add_argument(
96
+ "--save-log",
97
+ "-sl",
98
+ action="store_true",
99
+ help="Save Log Feature: Exports the structure to reponame_savelog.txt.",
100
+ )
101
+ parser.add_argument(
102
+ "--ignore",
103
+ nargs="*",
104
+ default=None,
105
+ help="Extra patterns to ignore.",
106
+ )
107
+ parser.add_argument(
108
+ "--no-default-ignore",
109
+ action="store_true",
110
+ help="Disable the built-in ignore list.",
111
+ )
112
+ args = parser.parse_args()
113
+
114
+ root = os.path.abspath(args.path)
115
+ root_name = os.path.basename(root) or os.path.basename(os.path.dirname(root))
116
+
117
+ ignore_set: set = set() if args.no_default_ignore else set(DEFAULT_IGNORE)
118
+ if args.ignore:
119
+ ignore_set.update(args.ignore)
120
+
121
+ out_path = args.output or os.path.join(root, "STRUCTURE.md")
122
+ ignore_set.add(os.path.basename(out_path))
123
+
124
+ print(f"Scanning: {root}")
125
+ tree_lines = build_tree(root, ignore_set)
126
+
127
+ md_lines = [
128
+ f"## Project Structure: {root_name}\n",
129
+ f"```text",
130
+ f"{root_name}/",
131
+ ]
132
+ md_lines.extend(tree_lines)
133
+ md_lines.append("```\n")
134
+
135
+ content = "\n".join(md_lines)
136
+ print("\n" + content)
137
+
138
+ # Output to standard file
139
+ with open(out_path, "w", encoding="utf-8") as fh:
140
+ fh.write(content)
141
+ print(f"Structure written to {out_path}")
142
+
143
+ # Handle -sl save log feature
144
+ if args.save_log:
145
+ log_filename = f"{root_name}_savelog.txt"
146
+ log_path = os.path.join(root, log_filename)
147
+ with open(log_path, "w", encoding="utf-8") as fh:
148
+ fh.write(f"REPOSITORY STRUCTURE AUDIT: {root_name}\n")
149
+ fh.write(f"Path: {root}\n\n")
150
+ fh.write(f"{root_name}/\n")
151
+ fh.write("\n".join(tree_lines))
152
+ print(f"Audit log saved to {log_path}")
153
+
154
+
155
+ if __name__ == "__main__":
156
+ main()
file-content-surgeon/scan_techstack.py ADDED
@@ -0,0 +1,811 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Note for AI agent/user: always run `python scan_techstack.py --help` before reading this file, an LLM or AI agent is not an excemptions, run the command and read the output before reading this codebase.
3
+ If reading the codebase is unnessesay, then do not read this file's codebase.
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import sys
9
+ import argparse
10
+ from pathlib import Path
11
+ from collections import defaultdict
12
+
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Comprehensive file-extension → human-readable label mapping
16
+ # Covers programming, data, document, media, archive, config, and more.
17
+ # Any extension NOT in this dict will be auto-labelled from the extension.
18
+ # ---------------------------------------------------------------------------
19
+ EXT_LABELS: dict[str, str] = {
20
+ # ── Programming Languages ──────────────────────────────────────────
21
+ ".py": "Python",
22
+ ".pyw": "Python (windowed)",
23
+ ".pyx": "Cython",
24
+ ".pxd": "Cython Definition",
25
+ ".pyi": "Python Stub",
26
+ ".pyc": "Python Compiled",
27
+ ".pyo": "Python Optimized",
28
+ ".js": "JavaScript",
29
+ ".mjs": "ES Module JavaScript",
30
+ ".cjs": "CommonJS JavaScript",
31
+ ".jsx": "JSX (React)",
32
+ ".ts": "TypeScript",
33
+ ".tsx": "TSX (React)",
34
+ ".java": "Java",
35
+ ".class": "Java Class",
36
+ ".jar": "Java Archive",
37
+ ".kt": "Kotlin",
38
+ ".kts": "Kotlin Script",
39
+ ".scala": "Scala",
40
+ ".groovy": "Groovy",
41
+ ".c": "C",
42
+ ".h": "C Header",
43
+ ".cpp": "C++",
44
+ ".cxx": "C++",
45
+ ".cc": "C++",
46
+ ".c++": "C++",
47
+ ".hpp": "C++ Header",
48
+ ".hxx": "C++ Header",
49
+ ".hh": "C++ Header",
50
+ ".h++": "C++ Header",
51
+ ".cs": "C#",
52
+ ".csx": "C# Script",
53
+ ".fs": "F#",
54
+ ".fsx": "F# Script",
55
+ ".fsi": "F# Signature",
56
+ ".vb": "Visual Basic",
57
+ ".vbs": "VBScript",
58
+ ".go": "Go",
59
+ ".rs": "Rust",
60
+ ".rb": "Ruby",
61
+ ".erb": "Embedded Ruby",
62
+ ".php": "PHP",
63
+ ".pl": "Perl",
64
+ ".pm": "Perl Module",
65
+ ".lua": "Lua",
66
+ ".r": "R",
67
+ ".rmd": "R Markdown",
68
+ ".swift": "Swift",
69
+ ".m": "Objective-C / MATLAB",
70
+ ".mm": "Objective-C++",
71
+ ".d": "D",
72
+ ".dart": "Dart",
73
+ ".ex": "Elixir",
74
+ ".exs": "Elixir Script",
75
+ ".erl": "Erlang",
76
+ ".hrl": "Erlang Header",
77
+ ".hs": "Haskell",
78
+ ".lhs": "Literate Haskell",
79
+ ".ml": "OCaml",
80
+ ".mli": "OCaml Interface",
81
+ ".clj": "Clojure",
82
+ ".cljs": "ClojureScript",
83
+ ".cljc": "Clojure Common",
84
+ ".edn": "EDN (Clojure)",
85
+ ".lisp": "Common Lisp",
86
+ ".cl": "Common Lisp",
87
+ ".el": "Emacs Lisp",
88
+ ".scm": "Scheme",
89
+ ".rkt": "Racket",
90
+ ".nim": "Nim",
91
+ ".zig": "Zig",
92
+ ".v": "V / Verilog",
93
+ ".vhd": "VHDL",
94
+ ".vhdl": "VHDL",
95
+ ".sv": "SystemVerilog",
96
+ ".asm": "Assembly",
97
+ ".s": "Assembly",
98
+ ".f": "Fortran",
99
+ ".f90": "Fortran 90",
100
+ ".f95": "Fortran 95",
101
+ ".f03": "Fortran 2003",
102
+ ".f08": "Fortran 2008",
103
+ ".for": "Fortran",
104
+ ".cob": "COBOL",
105
+ ".cbl": "COBOL",
106
+ ".pas": "Pascal",
107
+ ".pp": "Pascal",
108
+ ".ada": "Ada",
109
+ ".adb": "Ada Body",
110
+ ".ads": "Ada Spec",
111
+ ".tcl": "Tcl",
112
+ ".tk": "Tcl/Tk",
113
+ ".awk": "AWK",
114
+ ".sed": "sed",
115
+ ".jl": "Julia",
116
+ ".cr": "Crystal",
117
+ ".hack": "Hack",
118
+ ".hx": "Haxe",
119
+ ".hxml": "Haxe XML",
120
+ ".purs": "PureScript",
121
+ ".idr": "Idris",
122
+ ".agda": "Agda",
123
+ ".lean": "Lean",
124
+ ".coq": "Coq",
125
+ ".thy": "Isabelle",
126
+ ".pro": "Prolog",
127
+ ".sml": "Standard ML",
128
+ ".fig": "MATLAB Figure",
129
+ ".mat": "MATLAB Data",
130
+
131
+ # ── Shell / Scripting ──────────────────────────────────────────────
132
+ ".sh": "Shell Script",
133
+ ".bash": "Bash Script",
134
+ ".zsh": "Zsh Script",
135
+ ".fish": "Fish Script",
136
+ ".bat": "Batch File",
137
+ ".cmd": "Windows Command",
138
+ ".ps1": "PowerShell",
139
+ ".psm1": "PowerShell Module",
140
+ ".psd1": "PowerShell Data",
141
+
142
+ # ── Web / Markup / Templating ──────────────────────────────────────
143
+ ".html": "HTML",
144
+ ".htm": "HTML",
145
+ ".xhtml": "XHTML",
146
+ ".css": "CSS",
147
+ ".scss": "SCSS",
148
+ ".sass": "Sass",
149
+ ".less": "Less",
150
+ ".styl": "Stylus",
151
+ ".vue": "Vue",
152
+ ".svelte": "Svelte",
153
+ ".astro": "Astro",
154
+ ".ejs": "EJS Template",
155
+ ".pug": "Pug Template",
156
+ ".jade": "Jade Template",
157
+ ".hbs": "Handlebars",
158
+ ".mustache": "Mustache",
159
+ ".twig": "Twig Template",
160
+ ".jinja": "Jinja Template",
161
+ ".jinja2": "Jinja2 Template",
162
+ ".j2": "Jinja2 Template",
163
+ ".njk": "Nunjucks Template",
164
+ ".liquid": "Liquid Template",
165
+ ".haml": "HAML",
166
+ ".slim": "Slim Template",
167
+ ".blade.php":"Blade Template",
168
+ ".jsp": "JavaServer Pages",
169
+ ".asp": "ASP",
170
+ ".aspx": "ASP.NET",
171
+ ".cshtml": "Razor (C#)",
172
+ ".razor": "Razor",
173
+ ".wasm": "WebAssembly",
174
+
175
+ # ── Data / Config / Serialisation ──────────────────────────────────
176
+ ".json": "JSON",
177
+ ".jsonl": "JSON Lines",
178
+ ".json5": "JSON5",
179
+ ".geojson": "GeoJSON",
180
+ ".xml": "XML",
181
+ ".xsl": "XSLT",
182
+ ".xslt": "XSLT",
183
+ ".xsd": "XML Schema",
184
+ ".dtd": "Document Type Definition",
185
+ ".yaml": "YAML",
186
+ ".yml": "YAML",
187
+ ".toml": "TOML",
188
+ ".ini": "INI Config",
189
+ ".cfg": "Config",
190
+ ".conf": "Config",
191
+ ".env": "Environment File",
192
+ ".properties": "Properties File",
193
+ ".csv": "CSV",
194
+ ".tsv": "TSV",
195
+ ".parquet": "Apache Parquet",
196
+ ".avro": "Apache Avro",
197
+ ".orc": "Apache ORC",
198
+ ".feather": "Feather (Arrow)",
199
+ ".arrow": "Apache Arrow",
200
+ ".msgpack": "MessagePack",
201
+ ".bson": "BSON",
202
+ ".cbor": "CBOR",
203
+ ".protobuf": "Protocol Buffer",
204
+ ".proto": "Protocol Buffer Schema",
205
+ ".thrift": "Apache Thrift",
206
+ ".hcl": "HCL (Terraform)",
207
+ ".tf": "Terraform",
208
+ ".tfvars": "Terraform Variables",
209
+ ".pkl": "Pickle",
210
+ ".pickle": "Pickle",
211
+ ".npy": "NumPy Array",
212
+ ".npz": "NumPy Archive",
213
+ ".h5": "HDF5",
214
+ ".hdf5": "HDF5",
215
+ ".nc": "NetCDF",
216
+ ".sqlite": "SQLite Database",
217
+ ".sqlite3": "SQLite Database",
218
+ ".db": "Database",
219
+ ".dbf": "dBASE",
220
+ ".mdb": "Access Database",
221
+ ".accdb": "Access Database",
222
+ ".lmdb": "LMDB",
223
+ ".leveldb": "LevelDB",
224
+
225
+ # ── Documents & Office ─────────────────────────────────────────────
226
+ ".md": "Markdown",
227
+ ".mdx": "MDX",
228
+ ".txt": "Plain Text",
229
+ ".text": "Plain Text",
230
+ ".log": "Log File",
231
+ ".rst": "reStructuredText",
232
+ ".adoc": "AsciiDoc",
233
+ ".org": "Org Mode",
234
+ ".tex": "LaTeX",
235
+ ".latex": "LaTeX",
236
+ ".bib": "BibTeX",
237
+ ".rtf": "Rich Text Format",
238
+ ".doc": "Microsoft Word",
239
+ ".docx": "Microsoft Word (OOXML)",
240
+ ".odt": "OpenDocument Text",
241
+ ".pdf": "PDF",
242
+ ".xls": "Microsoft Excel",
243
+ ".xlsx": "Microsoft Excel (OOXML)",
244
+ ".xlsm": "Excel Macro-Enabled",
245
+ ".xlsb": "Excel Binary",
246
+ ".ods": "OpenDocument Spreadsheet",
247
+ ".ppt": "Microsoft PowerPoint",
248
+ ".pptx": "Microsoft PowerPoint (OOXML)",
249
+ ".odp": "OpenDocument Presentation",
250
+ ".pages": "Apple Pages",
251
+ ".numbers": "Apple Numbers",
252
+ ".key": "Apple Keynote",
253
+ ".epub": "EPUB",
254
+ ".mobi": "Mobipocket",
255
+ ".djvu": "DjVu",
256
+ ".chm": "Compiled HTML Help",
257
+ ".man": "Man Page",
258
+ ".info": "GNU Info",
259
+
260
+ # ── Notebooks & Data Science ───────────────────────────────────────
261
+ ".ipynb": "Jupyter Notebook",
262
+ ".rmd": "R Markdown",
263
+ ".qmd": "Quarto Document",
264
+ ".dvc": "DVC File",
265
+ ".onnx": "ONNX Model",
266
+ ".pt": "PyTorch Model",
267
+ ".pth": "PyTorch Checkpoint",
268
+ ".safetensors": "SafeTensors Model",
269
+ ".gguf": "GGUF Model",
270
+ ".bin": "Binary File",
271
+
272
+ # ── Images ─────────────────────────────────────────────────────────
273
+ ".png": "PNG Image",
274
+ ".jpg": "JPEG Image",
275
+ ".jpeg": "JPEG Image",
276
+ ".gif": "GIF Image",
277
+ ".bmp": "Bitmap Image",
278
+ ".tiff": "TIFF Image",
279
+ ".tif": "TIFF Image",
280
+ ".webp": "WebP Image",
281
+ ".avif": "AVIF Image",
282
+ ".heic": "HEIC Image",
283
+ ".heif": "HEIF Image",
284
+ ".svg": "SVG Image",
285
+ ".ico": "Icon",
286
+ ".icns": "Apple Icon",
287
+ ".cur": "Cursor",
288
+ ".psd": "Photoshop",
289
+ ".ai": "Adobe Illustrator",
290
+ ".sketch": "Sketch",
291
+ ".fig": "Figma",
292
+ ".xd": "Adobe XD",
293
+ ".eps": "EPS Vector",
294
+ ".raw": "RAW Image",
295
+ ".cr2": "Canon RAW",
296
+ ".nef": "Nikon RAW",
297
+ ".arw": "Sony RAW",
298
+ ".dng": "DNG RAW",
299
+ ".exr": "OpenEXR",
300
+ ".hdr": "HDR Image",
301
+ ".pbm": "PBM Image",
302
+ ".pgm": "PGM Image",
303
+ ".ppm": "PPM Image",
304
+ ".pcx": "PCX Image",
305
+ ".tga": "TGA Image",
306
+
307
+ # ── Audio ──────────────────────────────────────────────────────────
308
+ ".mp3": "MP3 Audio",
309
+ ".wav": "WAV Audio",
310
+ ".flac": "FLAC Audio",
311
+ ".aac": "AAC Audio",
312
+ ".ogg": "Ogg Audio",
313
+ ".wma": "WMA Audio",
314
+ ".m4a": "M4A Audio",
315
+ ".opus": "Opus Audio",
316
+ ".aiff": "AIFF Audio",
317
+ ".mid": "MIDI",
318
+ ".midi": "MIDI",
319
+ ".amr": "AMR Audio",
320
+ ".ape": "APE Audio",
321
+ ".alac": "ALAC Audio",
322
+
323
+ # ── Video ──────────────────────────────────────────────────────────
324
+ ".mp4": "MP4 Video",
325
+ ".mkv": "MKV Video",
326
+ ".avi": "AVI Video",
327
+ ".mov": "QuickTime Video",
328
+ ".wmv": "WMV Video",
329
+ ".flv": "Flash Video",
330
+ ".webm": "WebM Video",
331
+ ".m4v": "M4V Video",
332
+ ".mpg": "MPEG Video",
333
+ ".mpeg": "MPEG Video",
334
+ ".3gp": "3GP Video",
335
+ ".ogv": "Ogg Video",
336
+ ".ts": "MPEG-TS",
337
+ ".vob": "DVD Video",
338
+ ".rm": "RealMedia",
339
+
340
+ # ── Fonts ──────────────────────────────────────────────────────────
341
+ ".ttf": "TrueType Font",
342
+ ".otf": "OpenType Font",
343
+ ".woff": "WOFF Font",
344
+ ".woff2": "WOFF2 Font",
345
+ ".eot": "EOT Font",
346
+ ".fon": "Bitmap Font",
347
+
348
+ # ── Archives / Compression ─────────────────────────────────────────
349
+ ".zip": "ZIP Archive",
350
+ ".gz": "Gzip Archive",
351
+ ".gzip": "Gzip Archive",
352
+ ".bz2": "Bzip2 Archive",
353
+ ".xz": "XZ Archive",
354
+ ".lz": "Lzip Archive",
355
+ ".lzma": "LZMA Archive",
356
+ ".zst": "Zstandard Archive",
357
+ ".tar": "Tar Archive",
358
+ ".tgz": "Tar+Gzip Archive",
359
+ ".tbz2": "Tar+Bzip2 Archive",
360
+ ".txz": "Tar+XZ Archive",
361
+ ".rar": "RAR Archive",
362
+ ".7z": "7-Zip Archive",
363
+ ".cab": "Cabinet Archive",
364
+ ".iso": "ISO Disk Image",
365
+ ".dmg": "macOS Disk Image",
366
+ ".img": "Disk Image",
367
+ ".vhd": "Virtual Hard Disk",
368
+ ".vhdx": "Virtual Hard Disk (VHDX)",
369
+ ".vmdk": "VMware Disk",
370
+ ".qcow2": "QEMU Disk",
371
+ ".deb": "Debian Package",
372
+ ".rpm": "RPM Package",
373
+ ".apk": "Android Package",
374
+ ".ipa": "iOS App",
375
+ ".snap": "Snap Package",
376
+ ".flatpak": "Flatpak Package",
377
+ ".appimage": "AppImage",
378
+ ".msi": "MSI Installer",
379
+ ".exe": "Windows Executable",
380
+ ".dll": "DLL Library",
381
+ ".so": "Shared Object",
382
+ ".dylib": "macOS Dynamic Library",
383
+ ".a": "Static Library",
384
+ ".lib": "Library",
385
+ ".o": "Object File",
386
+ ".obj": "Object File",
387
+ ".sys": "System File",
388
+ ".drv": "Driver",
389
+
390
+ # ── DevOps / CI / Containerisation ─────────────────────────────────
391
+ ".dockerfile":"Dockerfile",
392
+ ".vagrantfile":"Vagrantfile",
393
+ ".jenkinsfile":"Jenkinsfile",
394
+
395
+ # ── Trading / Domain-specific ──────────────────────────────────────
396
+ ".mq5": "MQL5 Source",
397
+ ".mq4": "MQL4 Source",
398
+ ".mqh": "MQL Header",
399
+ ".ex5": "MQL5 Compiled",
400
+ ".ex4": "MQL4 Compiled",
401
+ ".set": "MT Parameter Set",
402
+ ".pine": "Pine Script",
403
+ ".tpl": "MT Template",
404
+ ".chr": "MT Chart",
405
+ ".mql": "MQL Source",
406
+
407
+ # ── Misc / Build / Project ─────────────────────────────────────────
408
+ ".makefile": "Makefile",
409
+ ".cmake": "CMake",
410
+ ".gradle": "Gradle",
411
+ ".sbt": "SBT Build",
412
+ ".gemspec": "Ruby Gem Spec",
413
+ ".podspec": "CocoaPods Spec",
414
+ ".cabal": "Cabal (Haskell)",
415
+ ".csproj": "C# Project",
416
+ ".fsproj": "F# Project",
417
+ ".vbproj": "VB.NET Project",
418
+ ".sln": "Visual Studio Solution",
419
+ ".xcodeproj":"Xcode Project",
420
+ ".pbxproj": "Xcode Project (PBX)",
421
+ ".xcworkspace":"Xcode Workspace",
422
+ ".plist": "Property List",
423
+ ".lnk": "Windows Shortcut",
424
+ ".url": "URL Shortcut",
425
+ ".desktop": "Desktop Entry",
426
+ ".reg": "Windows Registry",
427
+ ".service": "Systemd Service",
428
+ ".timer": "Systemd Timer",
429
+ ".socket": "Systemd Socket",
430
+ ".lock": "Lock File",
431
+ ".patch": "Patch File",
432
+ ".diff": "Diff File",
433
+ ".map": "Source Map",
434
+ ".whl": "Python Wheel",
435
+ ".egg": "Python Egg",
436
+ ".gem": "Ruby Gem",
437
+ ".nupkg": "NuGet Package",
438
+ ".crate": "Rust Crate",
439
+
440
+ # ── SQL / Database ─────────────────────────────────────────────────
441
+ ".sql": "SQL",
442
+ ".mysql": "MySQL",
443
+ ".pgsql": "PostgreSQL",
444
+ ".plsql": "PL/SQL",
445
+
446
+ # ── GraphQL / API ──────────────────────────────────────────────────
447
+ ".graphql": "GraphQL",
448
+ ".gql": "GraphQL",
449
+ ".swagger": "Swagger",
450
+ ".openapi": "OpenAPI Spec",
451
+
452
+ # ── 3D / CAD / GIS ─────────────────────────────────────────────────
453
+ ".stl": "STL 3D Model",
454
+ ".obj": "Wavefront OBJ",
455
+ ".fbx": "FBX 3D Model",
456
+ ".gltf": "glTF 3D Model",
457
+ ".glb": "glTF Binary",
458
+ ".blend": "Blender File",
459
+ ".3ds": "3DS Model",
460
+ ".dae": "Collada",
461
+ ".usd": "USD Scene",
462
+ ".usda": "USD ASCII",
463
+ ".usdc": "USD Binary",
464
+ ".step": "STEP CAD",
465
+ ".stp": "STEP CAD",
466
+ ".iges": "IGES CAD",
467
+ ".igs": "IGES CAD",
468
+ ".dwg": "AutoCAD Drawing",
469
+ ".dxf": "AutoCAD Exchange",
470
+ ".shp": "Shapefile",
471
+ ".kml": "KML",
472
+ ".kmz": "KMZ",
473
+ ".gpx": "GPX Track",
474
+
475
+ # ── Certificates / Security ────────────────────────────────────────
476
+ ".pem": "PEM Certificate",
477
+ ".crt": "Certificate",
478
+ ".cer": "Certificate",
479
+ ".csr": "Certificate Request",
480
+ ".p12": "PKCS#12",
481
+ ".pfx": "PKCS#12",
482
+ ".asc": "PGP/GPG",
483
+ ".gpg": "GPG Encrypted",
484
+ ".sig": "Signature",
485
+ ".pub": "Public Key",
486
+
487
+ # ── Git-related ────────────────────────────────────────────────────
488
+ ".gitignore": "Git Ignore",
489
+ ".gitattributes": "Git Attributes",
490
+ ".gitmodules": "Git Submodules",
491
+
492
+ # ── Editor / IDE Config ────────────────────────────────────────────
493
+ ".editorconfig": "EditorConfig",
494
+ ".prettierrc": "Prettier Config",
495
+ ".eslintrc": "ESLint Config",
496
+ ".stylelintrc": "Stylelint Config",
497
+ ".babelrc": "Babel Config",
498
+ ".swcrc": "SWC Config",
499
+ ".nvmrc": "NVM Config",
500
+ ".npmrc": "NPM Config",
501
+ ".yarnrc": "Yarn Config",
502
+
503
+ # ── Misc ───────────────────────────────────────────────────────────
504
+ ".mw5": "MQL5 Variant",
505
+ ".webmanifest": "Web App Manifest",
506
+ }
507
+
508
+
509
+ # ---------------------------------------------------------------------------
510
+ # Syntax Detection Patterns
511
+ # ---------------------------------------------------------------------------
512
+ SYNTAX_PATTERNS = {
513
+ "function": re.compile(r"\b(def|function)\b\s+([a-zA-Z_][a-zA-Z0-9_]*)"),
514
+ "class": re.compile(r"\b(class)\b\s+([a-zA-Z_][a-zA-Z0-9_]*)"),
515
+ "variable": re.compile(r"\b(var|let|const)\b\s+([a-zA-Z_][a-zA-Z0-9_]*)|([a-zA-Z_][a-zA-Z0-9_]*)\s*=[^=]"),
516
+ }
517
+
518
+
519
+ # ---------------------------------------------------------------------------
520
+ # ---------------------------------------------------------------------------
521
+ DEFAULT_IGNORE = {
522
+ ".git",
523
+ "__pycache__",
524
+ ".venv",
525
+ "venv",
526
+ "env",
527
+ "node_modules",
528
+ ".ipynb_checkpoints",
529
+ ".mypy_cache",
530
+ ".pytest_cache",
531
+ ".tox",
532
+ ".eggs",
533
+ "*.egg-info",
534
+ ".DS_Store",
535
+ "Thumbs.db",
536
+ "desktop.ini",
537
+ }
538
+
539
+
540
+ def should_ignore(name: str, ignore_set: set) -> bool:
541
+ """Return True if *name* matches any pattern in the ignore set."""
542
+ if name in ignore_set:
543
+ return True
544
+ for pattern in ignore_set:
545
+ if pattern.startswith("*") and name.endswith(pattern[1:]):
546
+ return True
547
+ return False
548
+
549
+
550
+ def get_label(ext: str) -> str:
551
+ """Return the human-readable label for an extension."""
552
+ if ext in EXT_LABELS:
553
+ return EXT_LABELS[ext]
554
+ # Fallback: capitalise the extension name
555
+ return ext.upper().lstrip(".")
556
+
557
+
558
+ def scan_files(root_path: str, ignore_set: set, exclude_exts: set) -> dict:
559
+ """
560
+ Walk *root_path* and collect per-extension statistics.
561
+
562
+ Returns a dict keyed by normalised extension string, e.g. ".py",
563
+ with values ``{"count": int, "size_bytes": int, "label": str}``.
564
+
565
+ Files without an extension are grouped under "(no extension)".
566
+ """
567
+
568
+ stats: dict = defaultdict(lambda: {"count": 0, "size_bytes": 0, "lines": 0, "syntax": 0, "label": ""})
569
+
570
+ for dirpath, dirnames, filenames in os.walk(root_path):
571
+ # Prune ignored directories in-place so os.walk skips them
572
+ dirnames[:] = [
573
+ d for d in dirnames if not should_ignore(d, ignore_set)
574
+ ]
575
+
576
+ for fname in filenames:
577
+ if should_ignore(fname, ignore_set):
578
+ continue
579
+
580
+ filepath = os.path.join(dirpath, fname)
581
+
582
+ # Determine extension
583
+ _, ext = os.path.splitext(fname)
584
+ ext = ext.lower() # normalise
585
+
586
+ if ext and ext.lstrip(".") in exclude_exts:
587
+ continue
588
+
589
+ # Label
590
+ if ext:
591
+ label = f"{get_label(ext)} ({ext})"
592
+ else:
593
+ ext = "(none)"
594
+ label = "(no extension)"
595
+
596
+ try:
597
+ size = os.path.getsize(filepath)
598
+ except (OSError, PermissionError):
599
+ size = 0
600
+
601
+ # Line and Syntax counting
602
+ lines = 0
603
+ syntax_hits = 0
604
+ # Only scan text files < 5MB to preserve performance
605
+ if size < 5 * 1024 * 1024:
606
+ try:
607
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
608
+ file_content = f.read()
609
+ lines = file_content.count("\n") + 1
610
+ # Count syntax matches
611
+ for p in SYNTAX_PATTERNS.values():
612
+ syntax_hits += len(p.findall(file_content))
613
+ except Exception:
614
+ pass
615
+
616
+ stats[ext]["count"] += 1
617
+ stats[ext]["size_bytes"] += size
618
+ stats[ext]["lines"] += lines
619
+ stats[ext]["syntax"] += syntax_hits
620
+ stats[ext]["label"] = label
621
+
622
+ return dict(stats)
623
+
624
+
625
+ def format_size_kb(size_bytes: int) -> str:
626
+ """Return exact size in KB, rounded to 1 decimal place."""
627
+ kb = size_bytes / 1024
628
+ if kb == 0:
629
+ return "0"
630
+ # Show 1 decimal place for precision, strip trailing zero if whole number
631
+ formatted = f"{kb:,.1f}"
632
+ return formatted
633
+
634
+
635
+ def build_table(stats: dict, sort_by: str) -> list[str]:
636
+ """
637
+ Build a Markdown table from *stats*.
638
+
639
+ Returns a list of lines.
640
+ """
641
+
642
+ # Prepare rows
643
+ rows = []
644
+ total_count = 0
645
+ total_bytes = 0
646
+ total_lines = 0
647
+ total_syntax = 0
648
+
649
+ for ext, data in stats.items():
650
+ rows.append(
651
+ {
652
+ "label": data["label"],
653
+ "count": data["count"],
654
+ "size_bytes": data["size_bytes"],
655
+ "lines": data["lines"],
656
+ "syntax": data["syntax"],
657
+ "ext_key": ext,
658
+ }
659
+ )
660
+ total_count += data["count"]
661
+ total_bytes += data["size_bytes"]
662
+ total_lines += data["lines"]
663
+ total_syntax += data["syntax"]
664
+
665
+ # Sort
666
+ if sort_by == "count":
667
+ rows.sort(key=lambda r: (-r["count"], r["label"].lower()))
668
+ elif sort_by == "size":
669
+ rows.sort(key=lambda r: (-r["size_bytes"], r["label"].lower()))
670
+ else: # "ext"
671
+ rows.sort(key=lambda r: r["label"].lower())
672
+
673
+ # Build Markdown lines
674
+ lines = [
675
+ "| File Type | Count | Lines | Syntax Hits | Size (KB) |",
676
+ "| :--- | :--- | :--- | :--- | :--- |",
677
+ ]
678
+ for row in rows:
679
+ lines.append(
680
+ f"| {row['label']} | {row['count']} | {row['lines']} | {row['syntax']} | {format_size_kb(row['size_bytes'])} |"
681
+ )
682
+
683
+ # Totals row
684
+ lines.append(f"| **Total** | **{total_count}** | **{total_lines}** | **{total_syntax}** | **{format_size_kb(total_bytes)}** |")
685
+
686
+ return lines
687
+
688
+
689
+ def main() -> None:
690
+ # Ensure stdout handles UTF-8 (prevents UnicodeEncodeError on Windows)
691
+ if hasattr(sys.stdout, "reconfigure"):
692
+ sys.stdout.reconfigure(encoding="utf-8")
693
+
694
+ parser = argparse.ArgumentParser(
695
+ description="Audit repository tech-stack (file types, counts, sizes)."
696
+ )
697
+ parser.add_argument(
698
+ "path",
699
+ nargs="?",
700
+ default=".",
701
+ help="Root directory to scan (default: current directory).",
702
+ )
703
+ parser.add_argument(
704
+ "--output",
705
+ "-o",
706
+ default=None,
707
+ help="Output Markdown file path (default: TECHSTACK.md in scanned dir).",
708
+ )
709
+ parser.add_argument(
710
+ "--ignore",
711
+ nargs="*",
712
+ default=None,
713
+ help="Extra directory/file names to ignore.",
714
+ )
715
+ parser.add_argument(
716
+ "--no-default-ignore",
717
+ action="store_true",
718
+ help="Disable the built-in ignore list.",
719
+ )
720
+ parser.add_argument(
721
+ "--exclude-ext",
722
+ nargs="*",
723
+ default=None,
724
+ help="File extensions to exclude from output (without dot, e.g. png mp4 git).",
725
+ )
726
+ parser.add_argument(
727
+ "--sort",
728
+ choices=["count", "size", "ext"],
729
+ default="count",
730
+ help="Sort order for the table (default: count descending).",
731
+ )
732
+ parser.add_argument(
733
+ "--save-log",
734
+ "-sl",
735
+ action="store_true",
736
+ help="Save Log Feature: Exports the audit results to reponame_savelog.txt.",
737
+ )
738
+ args = parser.parse_args()
739
+
740
+ root = os.path.abspath(args.path)
741
+ root_name = os.path.basename(root)
742
+
743
+ # Build ignore set
744
+ ignore_set: set = set() if args.no_default_ignore else set(DEFAULT_IGNORE)
745
+ if args.ignore:
746
+ ignore_set.update(args.ignore)
747
+
748
+ # Always ignore the output file itself
749
+ out_path = args.output or os.path.join(root, "TECHSTACK.md")
750
+ out_name = os.path.basename(out_path)
751
+ ignore_set.add(out_name)
752
+
753
+ # Excluded extensions
754
+ exclude_exts: set = set()
755
+ if args.exclude_ext:
756
+ exclude_exts = {e.lower().lstrip(".") for e in args.exclude_ext}
757
+
758
+ print(f"Scanning: {root}")
759
+ print(f"Ignoring: {', '.join(sorted(ignore_set))}")
760
+ if exclude_exts:
761
+ print(f"Excluding extensions: {', '.join(sorted(exclude_exts))}")
762
+
763
+ stats = scan_files(root, ignore_set, exclude_exts)
764
+ table_lines = build_table(stats, sort_by=args.sort)
765
+
766
+ # Compose Markdown
767
+ md_lines = [
768
+ f"## Techstack\n",
769
+ f"Audit of **{root_name}** project files (excluding environment and cache):\n",
770
+ ]
771
+ md_lines.extend(table_lines)
772
+ md_lines.append("") # trailing newline
773
+
774
+ content = "\n".join(md_lines)
775
+ print("\n" + content)
776
+
777
+ with open(out_path, "w", encoding="utf-8") as fh:
778
+ fh.write(content)
779
+ print(f"Tech-stack audit written to {out_path}")
780
+
781
+ # Handle -sl save log feature
782
+ if args.save_log:
783
+ log_filename = f"{root_name}_savelog.txt"
784
+ log_path = os.path.join(root, log_filename)
785
+ with open(log_path, "w", encoding="utf-8") as fh:
786
+ fh.write(f"REPOSITORY TECH-STACK AUDIT: {root_name}\n")
787
+ fh.write(f"Path: {root}\n\n")
788
+ header = f"{'FILE TYPE':<30} {'COUNT':>8} {'LINES':>10} {'SYNTAX':>10} {'SIZE (KB)':>15}\n"
789
+ fh.write(header)
790
+ fh.write("-" * len(header) + "\n")
791
+
792
+ total_count = 0
793
+ total_bytes = 0
794
+ total_lines = 0
795
+ total_syntax = 0
796
+ for ext, data in stats.items():
797
+ total_count += data["count"]
798
+ total_bytes += data["size_bytes"]
799
+ total_lines += data["lines"]
800
+ total_syntax += data["syntax"]
801
+ row = f"{data['label']:<30} {data['count']:>8} {data['lines']:>10} {data['syntax']:>10} {format_size_kb(data['size_bytes']):>15}\n"
802
+ fh.write(row)
803
+
804
+ fh.write("-" * len(header) + "\n")
805
+ fh.write(f"{'TOTAL':<30} {total_count:>8} {total_lines:>10} {total_syntax:>10} {format_size_kb(total_bytes):>15}\n")
806
+
807
+ print(f"Audit log saved to {log_path}")
808
+
809
+
810
+ if __name__ == "__main__":
811
+ main()