Regolith/repo-tokens/action.yml

name: Repo Tokens
description: Count codebase tokens with tiktoken and update a README badge

inputs:
  include:
    description: 'Glob patterns for files to count (space-separated)'
    required: true
  exclude:
    description: 'Glob patterns to exclude (space-separated)'
    required: false
    default: ''
  context-window:
    description: 'Context window size for percentage calculation'
    required: false
    default: '200000'
  readme:
    description: 'Path to README file'
    required: false
    default: 'README.md'
  encoding:
    description: 'Tiktoken encoding name'
    required: false
    default: 'cl100k_base'
  marker:
    description: 'HTML comment marker name'
    required: false
    default: 'token-count'
  badge-path:
    description: 'Path to write SVG badge (empty = no SVG)'
    required: false
    default: ''

outputs:
  tokens:
    description: 'Total token count'
    value: ${{ steps.count.outputs.tokens }}
  percentage:
    description: 'Percentage of context window'
    value: ${{ steps.count.outputs.percentage }}
  badge:
    description: 'Badge text that was inserted'
    value: ${{ steps.count.outputs.badge }}

runs:
  using: composite
  steps:
    - name: Install tiktoken
      shell: bash
      run: pip install tiktoken

    - name: Count tokens and update README
      id: count
      shell: python
      env:
        INPUT_INCLUDE: ${{ inputs.include }}
        INPUT_EXCLUDE: ${{ inputs.exclude }}
        INPUT_CONTEXT_WINDOW: ${{ inputs.context-window }}
        INPUT_README: ${{ inputs.readme }}
        INPUT_ENCODING: ${{ inputs.encoding }}
        INPUT_MARKER: ${{ inputs.marker }}
        INPUT_BADGE_PATH: ${{ inputs.badge-path }}
      run: |
        import glob, os, re, tiktoken

        include_patterns = os.environ["INPUT_INCLUDE"].split()
        exclude_patterns = os.environ["INPUT_EXCLUDE"].split()
        context_window = int(os.environ["INPUT_CONTEXT_WINDOW"])
        readme_path = os.environ["INPUT_README"]
        encoding_name = os.environ["INPUT_ENCODING"]
        marker = os.environ["INPUT_MARKER"]
        badge_path = os.environ.get("INPUT_BADGE_PATH", "").strip()

        # Expand globs
        included = set()
        for pattern in include_patterns:
            included.update(glob.glob(pattern, recursive=True))

        excluded = set()
        for pattern in exclude_patterns:
            excluded.update(glob.glob(pattern, recursive=True))

        files = sorted(included - excluded)
        files = [f for f in files if os.path.isfile(f)]

        # Count tokens
        enc = tiktoken.get_encoding(encoding_name)
        total = 0
        for path in files:
            try:
                with open(path, "r", encoding="utf-8", errors="ignore") as f:
                    total += len(enc.encode(f.read()))
            except Exception as e:
                print(f"Skipping {path}: {e}")

        # Format
        if total >= 100000:
            display = f"{round(total / 1000)}k"
        elif total >= 1000:
            display = f"{total / 1000:.1f}k"
        else:
            display = str(total)

        pct = round(total / context_window * 100)
        badge = f"{display} tokens \u00b7 {pct}% of context window"

        print(f"Files: {len(files)}, Tokens: {total}, Badge: {badge}")

        # Update README (text between markers)
        marker_re = re.compile(
            rf"(<!--\s*{re.escape(marker)}\s*-->).*?(<!--\s*/{re.escape(marker)}\s*-->)",
            re.DOTALL,
        )

        with open(readme_path, "r", encoding="utf-8") as f:
            content = f.read()

        repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens"
        linked_badge = f'<a href="{repo_tokens_url}">{badge}</a>'
        new_content = marker_re.sub(rf"\1{linked_badge}\2", content)

        if new_content != content:
            with open(readme_path, "w", encoding="utf-8") as f:
                f.write(new_content)
            print("README updated")
        else:
            print("No change to README")

        # Generate SVG badge
        if badge_path:
            label_text = "tokens"
            value_text = display
            full_desc = f"{display} tokens, {pct}% of context window"

            cw = 7.0
            label_w = round(len(label_text) * cw) + 10
            value_w = round(len(value_text) * cw) + 10
            total_w = label_w + value_w

            if pct < 30:
                color = "#4c1"
            elif pct < 50:
                color = "#97ca00"
            elif pct < 70:
                color = "#dfb317"
            else:
                color = "#e05d44"

            lx = label_w // 2
            vx = label_w + value_w // 2

            repo_tokens_url = "https://github.com/qwibitai/nanoclaw/tree/main/repo-tokens"

            svg = f'''<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="{total_w}" height="20" role="img" aria-label="{full_desc}">
          <title>{full_desc}</title>
          <linearGradient id="s" x2="0" y2="100%">
            <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
            <stop offset="1" stop-opacity=".1"/>
          </linearGradient>
          <clipPath id="r">
            <rect width="{total_w}" height="20" rx="3" fill="#fff"/>
          </clipPath>
          <a xlink:href="{repo_tokens_url}">
            <g clip-path="url(#r)">
              <rect width="{label_w}" height="20" fill="#555"/>
              <rect x="{label_w}" width="{value_w}" height="20" fill="{color}"/>
              <rect width="{total_w}" height="20" fill="url(#s)"/>
              <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" font-size="11">
                <text aria-hidden="true" x="{lx}" y="15" fill="#010101" fill-opacity=".3">{label_text}</text>
                <text x="{lx}" y="14">{label_text}</text>
                <text aria-hidden="true" x="{vx}" y="15" fill="#010101" fill-opacity=".3">{value_text}</text>
                <text x="{vx}" y="14">{value_text}</text>
              </g>
            </g>
          </a>
        </svg>'''

            os.makedirs(os.path.dirname(badge_path) or ".", exist_ok=True)
            with open(badge_path, "w", encoding="utf-8") as f:
                f.write(svg)
            print(f"Badge SVG written to {badge_path}")

        # Set outputs
        with open(os.environ["GITHUB_OUTPUT"], "a") as f:
            f.write(f"tokens={total}\n")
            f.write(f"percentage={pct}\n")
            f.write(f"badge={badge}\n")