name: Repo Tokens description: Count codebase tokens with tiktoken and update a README badge inputs: include: description: 'Glob patterns for files to count (space-separated)' required: true exclude: description: 'Glob patterns to exclude (space-separated)' required: false default: '' context-window: description: 'Context window size for percentage calculation' required: false default: '200000' readme: description: 'Path to README file' required: false default: 'README.md' encoding: description: 'Tiktoken encoding name' required: false default: 'cl100k_base' marker: description: 'HTML comment marker name' required: false default: 'token-count' badge-path: description: 'Path to write SVG badge (empty = no SVG)' required: false default: '' outputs: tokens: description: 'Total token count' value: ${{ steps.count.outputs.tokens }} percentage: description: 'Percentage of context window' value: ${{ steps.count.outputs.percentage }} badge: description: 'Badge text that was inserted' value: ${{ steps.count.outputs.badge }} runs: using: composite steps: - name: Install tiktoken shell: bash run: pip install tiktoken - name: Count tokens and update README id: count shell: python env: INPUT_INCLUDE: ${{ inputs.include }} INPUT_EXCLUDE: ${{ inputs.exclude }} INPUT_CONTEXT_WINDOW: ${{ inputs.context-window }} INPUT_README: ${{ inputs.readme }} INPUT_ENCODING: ${{ inputs.encoding }} INPUT_MARKER: ${{ inputs.marker }} INPUT_BADGE_PATH: ${{ inputs.badge-path }} run: | import glob, os, re, tiktoken include_patterns = os.environ["INPUT_INCLUDE"].split() exclude_patterns = os.environ["INPUT_EXCLUDE"].split() context_window = int(os.environ["INPUT_CONTEXT_WINDOW"]) readme_path = os.environ["INPUT_README"] encoding_name = os.environ["INPUT_ENCODING"] marker = os.environ["INPUT_MARKER"] badge_path = os.environ.get("INPUT_BADGE_PATH", "").strip() # Expand globs included = set() for pattern in include_patterns: included.update(glob.glob(pattern, recursive=True)) excluded = set() for pattern in exclude_patterns: excluded.update(glob.glob(pattern, recursive=True)) files = sorted(included - excluded) files = [f for f in files if os.path.isfile(f)] # Count tokens enc = tiktoken.get_encoding(encoding_name) total = 0 for path in files: try: with open(path, "r", encoding="utf-8", errors="ignore") as f: total += len(enc.encode(f.read())) except Exception as e: print(f"Skipping {path}: {e}") # Format if total >= 100000: display = f"{round(total / 1000)}k" elif total >= 1000: display = f"{total / 1000:.1f}k" else: display = str(total) pct = round(total / context_window * 100) badge = f"{display} tokens \u00b7 {pct}% of context window" print(f"Files: {len(files)}, Tokens: {total}, Badge: {badge}") # Update README (text between markers) marker_re = re.compile( rf"().*?()", re.DOTALL, ) with open(readme_path, "r", encoding="utf-8") as f: content = f.read() repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens" linked_badge = f'{badge}' new_content = marker_re.sub(rf"\1{linked_badge}\2", content) if new_content != content: with open(readme_path, "w", encoding="utf-8") as f: f.write(new_content) print("README updated") else: print("No change to README") # Generate SVG badge if badge_path: label_text = "tokens" value_text = display full_desc = f"{display} tokens, {pct}% of context window" cw = 7.0 label_w = round(len(label_text) * cw) + 10 value_w = round(len(value_text) * cw) + 10 total_w = label_w + value_w if pct < 30: color = "#4c1" elif pct < 50: color = "#97ca00" elif pct < 70: color = "#dfb317" else: color = "#e05d44" lx = label_w // 2 vx = label_w + value_w // 2 repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens" svg = f''' {full_desc} {label_text} {value_text} ''' os.makedirs(os.path.dirname(badge_path) or ".", exist_ok=True) with open(badge_path, "w", encoding="utf-8") as f: f.write(svg) print(f"Badge SVG written to {badge_path}") # Set outputs with open(os.environ["GITHUB_OUTPUT"], "a") as f: f.write(f"tokens={total}\n") f.write(f"percentage={pct}\n") f.write(f"badge={badge}\n")