| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- import { basename, extname, posix, sep } from 'path'
- /**
- * File patterns that should be excluded from attribution.
- * Based on GitHub Linguist vendored patterns and common generated file patterns.
- */
- // Exact file name matches (case-insensitive)
- const EXCLUDED_FILENAMES = new Set([
- 'package-lock.json',
- 'yarn.lock',
- 'pnpm-lock.yaml',
- 'bun.lockb',
- 'bun.lock',
- 'composer.lock',
- 'gemfile.lock',
- 'cargo.lock',
- 'poetry.lock',
- 'pipfile.lock',
- 'shrinkwrap.json',
- 'npm-shrinkwrap.json',
- ])
- // File extension patterns (case-insensitive)
- const EXCLUDED_EXTENSIONS = new Set([
- '.lock',
- '.min.js',
- '.min.css',
- '.min.html',
- '.bundle.js',
- '.bundle.css',
- '.generated.ts',
- '.generated.js',
- '.d.ts', // TypeScript declaration files
- ])
- // Directory patterns that indicate generated/vendored content
- const EXCLUDED_DIRECTORIES = [
- '/dist/',
- '/build/',
- '/out/',
- '/output/',
- '/node_modules/',
- '/vendor/',
- '/vendored/',
- '/third_party/',
- '/third-party/',
- '/external/',
- '/.next/',
- '/.nuxt/',
- '/.svelte-kit/',
- '/coverage/',
- '/__pycache__/',
- '/.tox/',
- '/venv/',
- '/.venv/',
- '/target/release/',
- '/target/debug/',
- ]
- // Filename patterns using regex for more complex matching
- const EXCLUDED_FILENAME_PATTERNS = [
- /^.*\.min\.[a-z]+$/i, // *.min.*
- /^.*-min\.[a-z]+$/i, // *-min.*
- /^.*\.bundle\.[a-z]+$/i, // *.bundle.*
- /^.*\.generated\.[a-z]+$/i, // *.generated.*
- /^.*\.gen\.[a-z]+$/i, // *.gen.*
- /^.*\.auto\.[a-z]+$/i, // *.auto.*
- /^.*_generated\.[a-z]+$/i, // *_generated.*
- /^.*_gen\.[a-z]+$/i, // *_gen.*
- /^.*\.pb\.(go|js|ts|py|rb)$/i, // Protocol buffer generated files
- /^.*_pb2?\.py$/i, // Python protobuf files
- /^.*\.pb\.h$/i, // C++ protobuf headers
- /^.*\.grpc\.[a-z]+$/i, // gRPC generated files
- /^.*\.swagger\.[a-z]+$/i, // Swagger generated files
- /^.*\.openapi\.[a-z]+$/i, // OpenAPI generated files
- ]
- /**
- * Check if a file should be excluded from attribution based on Linguist-style rules.
- *
- * @param filePath - Relative file path from repository root
- * @returns true if the file should be excluded from attribution
- */
- export function isGeneratedFile(filePath: string): boolean {
- // Normalize path separators for consistent pattern matching (patterns use posix-style /)
- const normalizedPath =
- posix.sep + filePath.split(sep).join(posix.sep).replace(/^\/+/, '')
- const fileName = basename(filePath).toLowerCase()
- const ext = extname(filePath).toLowerCase()
- // Check exact filename matches
- if (EXCLUDED_FILENAMES.has(fileName)) {
- return true
- }
- // Check extension matches
- if (EXCLUDED_EXTENSIONS.has(ext)) {
- return true
- }
- // Check for compound extensions like .min.js
- const parts = fileName.split('.')
- if (parts.length > 2) {
- const compoundExt = '.' + parts.slice(-2).join('.')
- if (EXCLUDED_EXTENSIONS.has(compoundExt)) {
- return true
- }
- }
- // Check directory patterns
- for (const dir of EXCLUDED_DIRECTORIES) {
- if (normalizedPath.includes(dir)) {
- return true
- }
- }
- // Check filename patterns
- for (const pattern of EXCLUDED_FILENAME_PATTERNS) {
- if (pattern.test(fileName)) {
- return true
- }
- }
- return false
- }
- /**
- * Filter a list of files to exclude generated files.
- *
- * @param files - Array of file paths
- * @returns Array of files that are not generated
- */
- export function filterGeneratedFiles(files: string[]): string[] {
- return files.filter(file => !isGeneratedFile(file))
- }
|