Reusable composite action that counts codebase tokens using tiktoken and generates a shields.io-style SVG badge. Color reflects context window usage: green (<30%), yellow-green (30-50%), yellow (50-70%), red (70%+). Badge includes hardcoded link back to repo-tokens. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
187 lines
6.4 KiB
YAML
187 lines
6.4 KiB
YAML
name: Repo Tokens
|
|
description: Count codebase tokens with tiktoken and update a README badge
|
|
|
|
inputs:
|
|
include:
|
|
description: 'Glob patterns for files to count (space-separated)'
|
|
required: true
|
|
exclude:
|
|
description: 'Glob patterns to exclude (space-separated)'
|
|
required: false
|
|
default: ''
|
|
context-window:
|
|
description: 'Context window size for percentage calculation'
|
|
required: false
|
|
default: '200000'
|
|
readme:
|
|
description: 'Path to README file'
|
|
required: false
|
|
default: 'README.md'
|
|
encoding:
|
|
description: 'Tiktoken encoding name'
|
|
required: false
|
|
default: 'cl100k_base'
|
|
marker:
|
|
description: 'HTML comment marker name'
|
|
required: false
|
|
default: 'token-count'
|
|
badge-path:
|
|
description: 'Path to write SVG badge (empty = no SVG)'
|
|
required: false
|
|
default: ''
|
|
|
|
outputs:
|
|
tokens:
|
|
description: 'Total token count'
|
|
value: ${{ steps.count.outputs.tokens }}
|
|
percentage:
|
|
description: 'Percentage of context window'
|
|
value: ${{ steps.count.outputs.percentage }}
|
|
badge:
|
|
description: 'Badge text that was inserted'
|
|
value: ${{ steps.count.outputs.badge }}
|
|
|
|
runs:
|
|
using: composite
|
|
steps:
|
|
- name: Install tiktoken
|
|
shell: bash
|
|
run: pip install tiktoken
|
|
|
|
- name: Count tokens and update README
|
|
id: count
|
|
shell: python
|
|
env:
|
|
INPUT_INCLUDE: ${{ inputs.include }}
|
|
INPUT_EXCLUDE: ${{ inputs.exclude }}
|
|
INPUT_CONTEXT_WINDOW: ${{ inputs.context-window }}
|
|
INPUT_README: ${{ inputs.readme }}
|
|
INPUT_ENCODING: ${{ inputs.encoding }}
|
|
INPUT_MARKER: ${{ inputs.marker }}
|
|
INPUT_BADGE_PATH: ${{ inputs.badge-path }}
|
|
run: |
|
|
import glob, os, re, tiktoken
|
|
|
|
include_patterns = os.environ["INPUT_INCLUDE"].split()
|
|
exclude_patterns = os.environ["INPUT_EXCLUDE"].split()
|
|
context_window = int(os.environ["INPUT_CONTEXT_WINDOW"])
|
|
readme_path = os.environ["INPUT_README"]
|
|
encoding_name = os.environ["INPUT_ENCODING"]
|
|
marker = os.environ["INPUT_MARKER"]
|
|
badge_path = os.environ.get("INPUT_BADGE_PATH", "").strip()
|
|
|
|
# Expand globs
|
|
included = set()
|
|
for pattern in include_patterns:
|
|
included.update(glob.glob(pattern, recursive=True))
|
|
|
|
excluded = set()
|
|
for pattern in exclude_patterns:
|
|
excluded.update(glob.glob(pattern, recursive=True))
|
|
|
|
files = sorted(included - excluded)
|
|
files = [f for f in files if os.path.isfile(f)]
|
|
|
|
# Count tokens
|
|
enc = tiktoken.get_encoding(encoding_name)
|
|
total = 0
|
|
for path in files:
|
|
try:
|
|
with open(path, "r", encoding="utf-8", errors="ignore") as f:
|
|
total += len(enc.encode(f.read()))
|
|
except Exception as e:
|
|
print(f"Skipping {path}: {e}")
|
|
|
|
# Format
|
|
if total >= 100000:
|
|
display = f"{round(total / 1000)}k"
|
|
elif total >= 1000:
|
|
display = f"{total / 1000:.1f}k"
|
|
else:
|
|
display = str(total)
|
|
|
|
pct = round(total / context_window * 100)
|
|
badge = f"{display} tokens \u00b7 {pct}% of context window"
|
|
|
|
print(f"Files: {len(files)}, Tokens: {total}, Badge: {badge}")
|
|
|
|
# Update README (text between markers)
|
|
marker_re = re.compile(
|
|
rf"(<!--\s*{re.escape(marker)}\s*-->).*?(<!--\s*/{re.escape(marker)}\s*-->)",
|
|
re.DOTALL,
|
|
)
|
|
|
|
with open(readme_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens"
|
|
linked_badge = f'<a href="{repo_tokens_url}">{badge}</a>'
|
|
new_content = marker_re.sub(rf"\1{linked_badge}\2", content)
|
|
|
|
if new_content != content:
|
|
with open(readme_path, "w", encoding="utf-8") as f:
|
|
f.write(new_content)
|
|
print("README updated")
|
|
else:
|
|
print("No change to README")
|
|
|
|
# Generate SVG badge
|
|
if badge_path:
|
|
label_text = "tokens"
|
|
value_text = display
|
|
full_desc = f"{display} tokens, {pct}% of context window"
|
|
|
|
cw = 7.0
|
|
label_w = round(len(label_text) * cw) + 10
|
|
value_w = round(len(value_text) * cw) + 10
|
|
total_w = label_w + value_w
|
|
|
|
if pct < 30:
|
|
color = "#4c1"
|
|
elif pct < 50:
|
|
color = "#97ca00"
|
|
elif pct < 70:
|
|
color = "#dfb317"
|
|
else:
|
|
color = "#e05d44"
|
|
|
|
lx = label_w // 2
|
|
vx = label_w + value_w // 2
|
|
|
|
repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens"
|
|
|
|
svg = f'''<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="{total_w}" height="20" role="img" aria-label="{full_desc}">
|
|
<title>{full_desc}</title>
|
|
<linearGradient id="s" x2="0" y2="100%">
|
|
<stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
|
|
<stop offset="1" stop-opacity=".1"/>
|
|
</linearGradient>
|
|
<clipPath id="r">
|
|
<rect width="{total_w}" height="20" rx="3" fill="#fff"/>
|
|
</clipPath>
|
|
<a xlink:href="{repo_tokens_url}">
|
|
<g clip-path="url(#r)">
|
|
<rect width="{label_w}" height="20" fill="#555"/>
|
|
<rect x="{label_w}" width="{value_w}" height="20" fill="{color}"/>
|
|
<rect width="{total_w}" height="20" fill="url(#s)"/>
|
|
<g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" font-size="11">
|
|
<text aria-hidden="true" x="{lx}" y="15" fill="#010101" fill-opacity=".3">{label_text}</text>
|
|
<text x="{lx}" y="14">{label_text}</text>
|
|
<text aria-hidden="true" x="{vx}" y="15" fill="#010101" fill-opacity=".3">{value_text}</text>
|
|
<text x="{vx}" y="14">{value_text}</text>
|
|
</g>
|
|
</g>
|
|
</a>
|
|
</svg>'''
|
|
|
|
os.makedirs(os.path.dirname(badge_path) or ".", exist_ok=True)
|
|
with open(badge_path, "w", encoding="utf-8") as f:
|
|
f.write(svg)
|
|
print(f"Badge SVG written to {badge_path}")
|
|
|
|
# Set outputs
|
|
with open(os.environ["GITHUB_OUTPUT"], "a") as f:
|
|
f.write(f"tokens={total}\n")
|
|
f.write(f"percentage={pct}\n")
|
|
f.write(f"badge={badge}\n")
|