Git-aware token count for a repo§

“Git-aware” means that this script ignores files that match a .gitignore pattern.

# Setup:
#
# python3 -m venv venv  # make sure that this `venv` dir is also in your .gitignore!
# . venv/bin/activate or . venv/bin/activate.fish
# python3 -m pip install google-genai
# python3 tokens.py

from json import dump, load
from os import environ, walk
from pathlib import Path
from subprocess import run
from typing import Dict

from google.genai import Client


gemini = Client(api_key=environ["GEMINI_API_KEY"])


def _is_ignored(root: Path, target: Path, ignored: list[Path]) -> bool:
    """Check if Git is ignoring the path."""
    # Ignore Git's directory itself.
    if str(target).lower().endswith(".git"):
        return True
    # Check if this path matches something in ``.gitignore``.
    command = ["git", "-C", str(root), "check-ignore", str(target)]
    result = run(command, capture_output=True, text=True)
    return str(target) in result.stdout


def _is_in_ignored_dir(target: Path, ignored: list[Path]):
    """Check if this path is in an ignored directory."""
    for maybe_parent_dir in ignored:
        if str(maybe_parent_dir) in str(target):
            return True
    return False


def collect(root: Path) -> (list[Path], int):
    """Collect all paths in the repository."""
    paths: list[Path] = []
    ignored: list[Path] = []
    tokens = 0
    for current_working_dir, _, files in walk(root):
        cwd = Path(current_working_dir)
        if _is_in_ignored_dir(cwd, ignored):
            print(f"ignoring dir: {str(cwd)}")
            continue
        if _is_ignored(Path(root), cwd, ignored):
            print(f"ignoring dir: {str(cwd)}")
            ignored.append(cwd)
            continue
        for file in files:
            path = cwd / Path(file)
            if _is_ignored(Path(root), path, ignored):
                print(f"ignoring file: {str(path)}")
                ignored.append(path)
                continue
            paths.append(path)
            with open(path, "r") as f:
                try:
                    contents = f.read()
                except UnicodeDecodeError as e:
                    continue
                print(f"counting tokens: {str(path)}")
                response = gemini.models.count_tokens(
                    model="gemini-2.5-flash", contents=contents
                )
                tokens += response.total_tokens
    return (paths, tokens)


def main():
    root = Path(".")
    paths, tokens = collect(root)
    print("*" * 80)
    print(f"file count: {len(paths)}")
    print(f"tokens: {tokens}")


if __name__ == "__main__":
    main()