name: Repo Tokens
description: Count codebase tokens with tiktoken and update a README badge
inputs:
include:
description: 'Glob patterns for files to count (space-separated)'
required: true
exclude:
description: 'Glob patterns to exclude (space-separated)'
required: false
default: ''
context-window:
description: 'Context window size for percentage calculation'
required: false
default: '200000'
readme:
description: 'Path to README file'
required: false
default: 'README.md'
encoding:
description: 'Tiktoken encoding name'
required: false
default: 'cl100k_base'
marker:
description: 'HTML comment marker name'
required: false
default: 'token-count'
badge-path:
description: 'Path to write SVG badge (empty = no SVG)'
required: false
default: ''
outputs:
tokens:
description: 'Total token count'
value: ${{ steps.count.outputs.tokens }}
percentage:
description: 'Percentage of context window'
value: ${{ steps.count.outputs.percentage }}
badge:
description: 'Badge text that was inserted'
value: ${{ steps.count.outputs.badge }}
runs:
using: composite
steps:
- name: Install tiktoken
shell: bash
run: pip install tiktoken
- name: Count tokens and update README
id: count
shell: python
env:
INPUT_INCLUDE: ${{ inputs.include }}
INPUT_EXCLUDE: ${{ inputs.exclude }}
INPUT_CONTEXT_WINDOW: ${{ inputs.context-window }}
INPUT_README: ${{ inputs.readme }}
INPUT_ENCODING: ${{ inputs.encoding }}
INPUT_MARKER: ${{ inputs.marker }}
INPUT_BADGE_PATH: ${{ inputs.badge-path }}
run: |
import glob, os, re, tiktoken
include_patterns = os.environ["INPUT_INCLUDE"].split()
exclude_patterns = os.environ["INPUT_EXCLUDE"].split()
context_window = int(os.environ["INPUT_CONTEXT_WINDOW"])
readme_path = os.environ["INPUT_README"]
encoding_name = os.environ["INPUT_ENCODING"]
marker = os.environ["INPUT_MARKER"]
badge_path = os.environ.get("INPUT_BADGE_PATH", "").strip()
# Expand globs
included = set()
for pattern in include_patterns:
included.update(glob.glob(pattern, recursive=True))
excluded = set()
for pattern in exclude_patterns:
excluded.update(glob.glob(pattern, recursive=True))
files = sorted(included - excluded)
files = [f for f in files if os.path.isfile(f)]
# Count tokens
enc = tiktoken.get_encoding(encoding_name)
total = 0
for path in files:
try:
with open(path, "r", encoding="utf-8", errors="ignore") as f:
total += len(enc.encode(f.read()))
except Exception as e:
print(f"Skipping {path}: {e}")
# Format
if total >= 100000:
display = f"{round(total / 1000)}k"
elif total >= 1000:
display = f"{total / 1000:.1f}k"
else:
display = str(total)
pct = round(total / context_window * 100)
badge = f"{display} tokens \u00b7 {pct}% of context window"
print(f"Files: {len(files)}, Tokens: {total}, Badge: {badge}")
# Update README (text between markers)
marker_re = re.compile(
rf"().*?()",
re.DOTALL,
)
with open(readme_path, "r", encoding="utf-8") as f:
content = f.read()
repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens"
linked_badge = f'{badge}'
new_content = marker_re.sub(rf"\1{linked_badge}\2", content)
if new_content != content:
with open(readme_path, "w", encoding="utf-8") as f:
f.write(new_content)
print("README updated")
else:
print("No change to README")
# Generate SVG badge
if badge_path:
label_text = "tokens"
value_text = display
full_desc = f"{display} tokens, {pct}% of context window"
cw = 7.0
label_w = round(len(label_text) * cw) + 10
value_w = round(len(value_text) * cw) + 10
total_w = label_w + value_w
if pct < 30:
color = "#4c1"
elif pct < 50:
color = "#97ca00"
elif pct < 70:
color = "#dfb317"
else:
color = "#e05d44"
lx = label_w // 2
vx = label_w + value_w // 2
repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens"
svg = f''''''
os.makedirs(os.path.dirname(badge_path) or ".", exist_ok=True)
with open(badge_path, "w", encoding="utf-8") as f:
f.write(svg)
print(f"Badge SVG written to {badge_path}")
# Set outputs
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
f.write(f"tokens={total}\n")
f.write(f"percentage={pct}\n")
f.write(f"badge={badge}\n")