generatedFiles.ts 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. import { basename, extname, posix, sep } from 'path'
  2. /**
  3. * File patterns that should be excluded from attribution.
  4. * Based on GitHub Linguist vendored patterns and common generated file patterns.
  5. */
  6. // Exact file name matches (case-insensitive)
  7. const EXCLUDED_FILENAMES = new Set([
  8. 'package-lock.json',
  9. 'yarn.lock',
  10. 'pnpm-lock.yaml',
  11. 'bun.lockb',
  12. 'bun.lock',
  13. 'composer.lock',
  14. 'gemfile.lock',
  15. 'cargo.lock',
  16. 'poetry.lock',
  17. 'pipfile.lock',
  18. 'shrinkwrap.json',
  19. 'npm-shrinkwrap.json',
  20. ])
  21. // File extension patterns (case-insensitive)
  22. const EXCLUDED_EXTENSIONS = new Set([
  23. '.lock',
  24. '.min.js',
  25. '.min.css',
  26. '.min.html',
  27. '.bundle.js',
  28. '.bundle.css',
  29. '.generated.ts',
  30. '.generated.js',
  31. '.d.ts', // TypeScript declaration files
  32. ])
  33. // Directory patterns that indicate generated/vendored content
  34. const EXCLUDED_DIRECTORIES = [
  35. '/dist/',
  36. '/build/',
  37. '/out/',
  38. '/output/',
  39. '/node_modules/',
  40. '/vendor/',
  41. '/vendored/',
  42. '/third_party/',
  43. '/third-party/',
  44. '/external/',
  45. '/.next/',
  46. '/.nuxt/',
  47. '/.svelte-kit/',
  48. '/coverage/',
  49. '/__pycache__/',
  50. '/.tox/',
  51. '/venv/',
  52. '/.venv/',
  53. '/target/release/',
  54. '/target/debug/',
  55. ]
  56. // Filename patterns using regex for more complex matching
  57. const EXCLUDED_FILENAME_PATTERNS = [
  58. /^.*\.min\.[a-z]+$/i, // *.min.*
  59. /^.*-min\.[a-z]+$/i, // *-min.*
  60. /^.*\.bundle\.[a-z]+$/i, // *.bundle.*
  61. /^.*\.generated\.[a-z]+$/i, // *.generated.*
  62. /^.*\.gen\.[a-z]+$/i, // *.gen.*
  63. /^.*\.auto\.[a-z]+$/i, // *.auto.*
  64. /^.*_generated\.[a-z]+$/i, // *_generated.*
  65. /^.*_gen\.[a-z]+$/i, // *_gen.*
  66. /^.*\.pb\.(go|js|ts|py|rb)$/i, // Protocol buffer generated files
  67. /^.*_pb2?\.py$/i, // Python protobuf files
  68. /^.*\.pb\.h$/i, // C++ protobuf headers
  69. /^.*\.grpc\.[a-z]+$/i, // gRPC generated files
  70. /^.*\.swagger\.[a-z]+$/i, // Swagger generated files
  71. /^.*\.openapi\.[a-z]+$/i, // OpenAPI generated files
  72. ]
  73. /**
  74. * Check if a file should be excluded from attribution based on Linguist-style rules.
  75. *
  76. * @param filePath - Relative file path from repository root
  77. * @returns true if the file should be excluded from attribution
  78. */
  79. export function isGeneratedFile(filePath: string): boolean {
  80. // Normalize path separators for consistent pattern matching (patterns use posix-style /)
  81. const normalizedPath =
  82. posix.sep + filePath.split(sep).join(posix.sep).replace(/^\/+/, '')
  83. const fileName = basename(filePath).toLowerCase()
  84. const ext = extname(filePath).toLowerCase()
  85. // Check exact filename matches
  86. if (EXCLUDED_FILENAMES.has(fileName)) {
  87. return true
  88. }
  89. // Check extension matches
  90. if (EXCLUDED_EXTENSIONS.has(ext)) {
  91. return true
  92. }
  93. // Check for compound extensions like .min.js
  94. const parts = fileName.split('.')
  95. if (parts.length > 2) {
  96. const compoundExt = '.' + parts.slice(-2).join('.')
  97. if (EXCLUDED_EXTENSIONS.has(compoundExt)) {
  98. return true
  99. }
  100. }
  101. // Check directory patterns
  102. for (const dir of EXCLUDED_DIRECTORIES) {
  103. if (normalizedPath.includes(dir)) {
  104. return true
  105. }
  106. }
  107. // Check filename patterns
  108. for (const pattern of EXCLUDED_FILENAME_PATTERNS) {
  109. if (pattern.test(fileName)) {
  110. return true
  111. }
  112. }
  113. return false
  114. }
  115. /**
  116. * Filter a list of files to exclude generated files.
  117. *
  118. * @param files - Array of file paths
  119. * @returns Array of files that are not generated
  120. */
  121. export function filterGeneratedFiles(files: string[]): string[] {
  122. return files.filter(file => !isGeneratedFile(file))
  123. }