Git-aware token count for a repo§
“Git-aware” means that this script ignores files that match a .gitignore
pattern.
# Setup:
#
# python3 -m venv venv # make sure that this `venv` dir is also in your .gitignore!
# . venv/bin/activate or . venv/bin/activate.fish
# python3 -m pip install google-genai
# python3 tokens.py
from json import dump, load
from os import environ, walk
from pathlib import Path
from subprocess import run
from typing import Dict
from google.genai import Client
gemini = Client(api_key=environ["GEMINI_API_KEY"])
def _is_ignored(root: Path, target: Path, ignored: list[Path]) -> bool:
"""Check if Git is ignoring the path."""
# Ignore Git's directory itself.
if str(target).lower().endswith(".git"):
return True
# Check if this path matches something in ``.gitignore``.
command = ["git", "-C", str(root), "check-ignore", str(target)]
result = run(command, capture_output=True, text=True)
return str(target) in result.stdout
def _is_in_ignored_dir(target: Path, ignored: list[Path]):
"""Check if this path is in an ignored directory."""
for maybe_parent_dir in ignored:
if str(maybe_parent_dir) in str(target):
return True
return False
def collect(root: Path) -> (list[Path], int):
"""Collect all paths in the repository."""
paths: list[Path] = []
ignored: list[Path] = []
tokens = 0
for current_working_dir, _, files in walk(root):
cwd = Path(current_working_dir)
if _is_in_ignored_dir(cwd, ignored):
print(f"ignoring dir: {str(cwd)}")
continue
if _is_ignored(Path(root), cwd, ignored):
print(f"ignoring dir: {str(cwd)}")
ignored.append(cwd)
continue
for file in files:
path = cwd / Path(file)
if _is_ignored(Path(root), path, ignored):
print(f"ignoring file: {str(path)}")
ignored.append(path)
continue
paths.append(path)
with open(path, "r") as f:
try:
contents = f.read()
except UnicodeDecodeError as e:
continue
print(f"counting tokens: {str(path)}")
response = gemini.models.count_tokens(
model="gemini-2.5-flash", contents=contents
)
tokens += response.total_tokens
return (paths, tokens)
def main():
root = Path(".")
paths, tokens = collect(root)
print("*" * 80)
print(f"file count: {len(paths)}")
print(f"tokens: {tokens}")
if __name__ == "__main__":
main()