2025-04-19 19:45:42 +01:00
/ * *
* @license
* Copyright 2025 Google LLC
* SPDX - License - Identifier : Apache - 2.0
* /
import fs from 'fs' ;
import fsPromises from 'fs/promises' ;
import path from 'path' ;
import { EOL } from 'os' ;
import { spawn } from 'child_process' ;
2025-06-12 19:46:00 -07:00
import { globStream } from 'glob' ;
2025-04-19 19:45:42 +01:00
import { BaseTool , ToolResult } from './tools.js' ;
import { SchemaValidator } from '../utils/schemaValidator.js' ;
import { makeRelative , shortenPath } from '../utils/paths.js' ;
import { getErrorMessage , isNodeError } from '../utils/errors.js' ;
// --- Interfaces ---
/ * *
* Parameters for the GrepTool
* /
export interface GrepToolParams {
/ * *
* The regular expression pattern to search for in file contents
* /
pattern : string ;
/ * *
* The directory to search in ( optional , defaults to current directory relative to root )
* /
path? : string ;
/ * *
* File pattern to include in the search ( e . g . "*.js" , "*.{ts,tsx}" )
* /
include? : string ;
}
/ * *
* Result object for a single grep match
* /
interface GrepMatch {
filePath : string ;
lineNumber : number ;
line : string ;
}
// --- GrepLogic Class ---
/ * *
* Implementation of the Grep tool logic ( moved from CLI )
* /
2025-04-21 10:53:11 -04:00
export class GrepTool extends BaseTool < GrepToolParams , ToolResult > {
2025-04-19 19:45:42 +01:00
static readonly Name = 'search_file_content' ; // Keep static name
/ * *
* Creates a new instance of the GrepLogic
* @param rootDirectory Root directory to ground this tool in . All operations will be restricted to this directory .
* /
2025-05-02 09:31:18 -07:00
constructor ( private rootDirectory : string ) {
2025-04-19 19:45:42 +01:00
super (
2025-04-21 10:53:11 -04:00
GrepTool . Name ,
'SearchText' ,
'Searches for a regular expression pattern within the content of files in a specified directory (or current working directory). Can filter files by a glob pattern. Returns the lines containing matches, along with their file paths and line numbers.' ,
2025-04-19 19:45:42 +01:00
{
properties : {
pattern : {
description :
"The regular expression (regex) pattern to search for within file contents (e.g., 'function\\s+myFunction', 'import\\s+\\{.*\\}\\s+from\\s+.*')." ,
type : 'string' ,
} ,
path : {
description :
'Optional: The absolute path to the directory to search within. If omitted, searches the current working directory.' ,
type : 'string' ,
} ,
include : {
description :
"Optional: A glob pattern to filter which files are searched (e.g., '*.js', '*.{ts,tsx}', 'src/**'). If omitted, searches all files (respecting potential global ignores)." ,
type : 'string' ,
} ,
} ,
required : [ 'pattern' ] ,
type : 'object' ,
} ,
) ;
// Ensure rootDirectory is absolute and normalized
this . rootDirectory = path . resolve ( rootDirectory ) ;
}
// --- Validation Methods ---
/ * *
* Checks if a path is within the root directory and resolves it .
* @param relativePath Path relative to the root directory ( or undefined for root ) .
* @returns The absolute path if valid and exists .
* @throws { Error } If path is outside root , doesn 't exist, or isn' t a directory .
* /
private resolveAndValidatePath ( relativePath? : string ) : string {
const targetPath = path . resolve ( this . rootDirectory , relativePath || '.' ) ;
// Security Check: Ensure the resolved path is still within the root directory.
if (
! targetPath . startsWith ( this . rootDirectory ) &&
targetPath !== this . rootDirectory
) {
throw new Error (
` Path validation failed: Attempted path " ${ relativePath || '.' } " resolves outside the allowed root directory " ${ this . rootDirectory } ". ` ,
) ;
}
// Check existence and type after resolving
try {
const stats = fs . statSync ( targetPath ) ;
if ( ! stats . isDirectory ( ) ) {
throw new Error ( ` Path is not a directory: ${ targetPath } ` ) ;
}
} catch ( error : unknown ) {
if ( isNodeError ( error ) && error . code !== 'ENOENT' ) {
throw new Error ( ` Path does not exist: ${ targetPath } ` ) ;
}
throw new Error (
` Failed to access path stats for ${ targetPath } : ${ error } ` ,
) ;
}
return targetPath ;
}
/ * *
* Validates the parameters for the tool
* @param params Parameters to validate
* @returns An error message string if invalid , null otherwise
* /
validateToolParams ( params : GrepToolParams ) : string | null {
if (
this . schema . parameters &&
! SchemaValidator . validate (
this . schema . parameters as Record < string , unknown > ,
params ,
)
) {
return 'Parameters failed schema validation.' ;
}
try {
new RegExp ( params . pattern ) ;
} catch ( error ) {
2025-05-18 23:13:57 -07:00
return ` Invalid regular expression pattern provided: ${ params . pattern } . Error: ${ getErrorMessage ( error ) } ` ;
2025-04-19 19:45:42 +01:00
}
try {
this . resolveAndValidatePath ( params . path ) ;
} catch ( error ) {
2025-05-18 23:13:57 -07:00
return getErrorMessage ( error ) ;
2025-04-19 19:45:42 +01:00
}
return null ; // Parameters are valid
}
// --- Core Execution ---
/ * *
* Executes the grep search with the given parameters
* @param params Parameters for the grep search
* @returns Result of the grep search
* /
2025-05-09 23:29:02 -07:00
async execute (
params : GrepToolParams ,
2025-06-12 19:46:00 -07:00
signal : AbortSignal ,
2025-05-09 23:29:02 -07:00
) : Promise < ToolResult > {
2025-04-19 19:45:42 +01:00
const validationError = this . validateToolParams ( params ) ;
if ( validationError ) {
return {
llmContent : ` Error: Invalid parameters provided. Reason: ${ validationError } ` ,
2025-05-18 23:13:57 -07:00
returnDisplay : ` Model provided invalid parameters. Error: ${ validationError } ` ,
2025-04-19 19:45:42 +01:00
} ;
}
let searchDirAbs : string ;
try {
searchDirAbs = this . resolveAndValidatePath ( params . path ) ;
const searchDirDisplay = params . path || '.' ;
const matches : GrepMatch [ ] = await this . performGrepSearch ( {
pattern : params.pattern ,
path : searchDirAbs ,
include : params.include ,
2025-06-12 19:46:00 -07:00
signal ,
2025-04-19 19:45:42 +01:00
} ) ;
if ( matches . length === 0 ) {
const noMatchMsg = ` No matches found for pattern " ${ params . pattern } " in path " ${ searchDirDisplay } " ${ params . include ? ` (filter: " ${ params . include } ") ` : '' } . ` ;
return { llmContent : noMatchMsg , returnDisplay : ` No matches found ` } ;
}
const matchesByFile = matches . reduce (
( acc , match ) = > {
const relativeFilePath =
path . relative (
searchDirAbs ,
path . resolve ( searchDirAbs , match . filePath ) ,
) || path . basename ( match . filePath ) ;
if ( ! acc [ relativeFilePath ] ) {
acc [ relativeFilePath ] = [ ] ;
}
acc [ relativeFilePath ] . push ( match ) ;
acc [ relativeFilePath ] . sort ( ( a , b ) = > a . lineNumber - b . lineNumber ) ;
return acc ;
} ,
{ } as Record < string , GrepMatch [ ] > ,
) ;
let llmContent = ` Found ${ matches . length } match(es) for pattern " ${ params . pattern } " in path " ${ searchDirDisplay } " ${ params . include ? ` (filter: " ${ params . include } ") ` : '' } : \ n--- \ n ` ;
for ( const filePath in matchesByFile ) {
llmContent += ` File: ${ filePath } \ n ` ;
matchesByFile [ filePath ] . forEach ( ( match ) = > {
const trimmedLine = match . line . trim ( ) ;
llmContent += ` L ${ match . lineNumber } : ${ trimmedLine } \ n ` ;
} ) ;
llmContent += '---\n' ;
}
return {
llmContent : llmContent.trim ( ) ,
returnDisplay : ` Found ${ matches . length } matche(s) ` ,
} ;
} catch ( error ) {
console . error ( ` Error during GrepLogic execution: ${ error } ` ) ;
2025-05-18 23:13:57 -07:00
const errorMessage = getErrorMessage ( error ) ;
2025-04-19 19:45:42 +01:00
return {
llmContent : ` Error during grep search operation: ${ errorMessage } ` ,
returnDisplay : ` Error: ${ errorMessage } ` ,
} ;
}
}
// --- Grep Implementation Logic ---
/ * *
* Checks if a command is available in the system ' s PATH .
* @param { string } command The command name ( e . g . , 'git' , 'grep' ) .
* @returns { Promise < boolean > } True if the command is available , false otherwise .
* /
private isCommandAvailable ( command : string ) : Promise < boolean > {
return new Promise ( ( resolve ) = > {
const checkCommand = process . platform === 'win32' ? 'where' : 'command' ;
const checkArgs =
process . platform === 'win32' ? [ command ] : [ '-v' , command ] ;
try {
const child = spawn ( checkCommand , checkArgs , {
stdio : 'ignore' ,
shell : process.platform === 'win32' ,
} ) ;
child . on ( 'close' , ( code ) = > resolve ( code === 0 ) ) ;
child . on ( 'error' , ( ) = > resolve ( false ) ) ;
} catch {
resolve ( false ) ;
}
} ) ;
}
/ * *
* Checks if a directory or its parent directories contain a . git folder .
* @param { string } dirPath Absolute path to the directory to check .
* @returns { Promise < boolean > } True if it ' s a Git repository , false otherwise .
* /
private async isGitRepository ( dirPath : string ) : Promise < boolean > {
let currentPath = path . resolve ( dirPath ) ;
const root = path . parse ( currentPath ) . root ;
try {
while ( true ) {
const gitPath = path . join ( currentPath , '.git' ) ;
try {
const stats = await fsPromises . stat ( gitPath ) ;
if ( stats . isDirectory ( ) || stats . isFile ( ) ) {
return true ;
}
// If .git exists but isn't a file/dir, something is weird, return false
return false ;
} catch ( error : unknown ) {
if ( ! isNodeError ( error ) || error . code !== 'ENOENT' ) {
2025-05-18 23:13:57 -07:00
console . debug (
2025-04-19 19:45:42 +01:00
` Error checking for .git in ${ currentPath } : ${ error } ` ,
) ;
return false ;
}
}
if ( currentPath === root ) {
break ;
}
currentPath = path . dirname ( currentPath ) ;
}
} catch ( error : unknown ) {
2025-05-18 23:13:57 -07:00
console . debug (
2025-04-19 19:45:42 +01:00
` Error traversing directory structure upwards from ${ dirPath } : ${ getErrorMessage ( error ) } ` ,
) ;
}
return false ;
}
/ * *
* Parses the standard output of grep - like commands ( git grep , system grep ) .
* Expects format : filePath : lineNumber :lineContent
* Handles colons within file paths and line content correctly .
* @param { string } output The raw stdout string .
* @param { string } basePath The absolute directory the search was run from , for relative paths .
* @returns { GrepMatch [ ] } Array of match objects .
* /
private parseGrepOutput ( output : string , basePath : string ) : GrepMatch [ ] {
const results : GrepMatch [ ] = [ ] ;
if ( ! output ) return results ;
const lines = output . split ( EOL ) ; // Use OS-specific end-of-line
for ( const line of lines ) {
if ( ! line . trim ( ) ) continue ;
// Find the index of the first colon.
const firstColonIndex = line . indexOf ( ':' ) ;
if ( firstColonIndex === - 1 ) continue ; // Malformed
// Find the index of the second colon, searching *after* the first one.
const secondColonIndex = line . indexOf ( ':' , firstColonIndex + 1 ) ;
if ( secondColonIndex === - 1 ) continue ; // Malformed
// Extract parts based on the found colon indices
const filePathRaw = line . substring ( 0 , firstColonIndex ) ;
const lineNumberStr = line . substring (
firstColonIndex + 1 ,
secondColonIndex ,
) ;
const lineContent = line . substring ( secondColonIndex + 1 ) ;
const lineNumber = parseInt ( lineNumberStr , 10 ) ;
if ( ! isNaN ( lineNumber ) ) {
const absoluteFilePath = path . resolve ( basePath , filePathRaw ) ;
const relativeFilePath = path . relative ( basePath , absoluteFilePath ) ;
results . push ( {
filePath : relativeFilePath || path . basename ( absoluteFilePath ) ,
lineNumber ,
line : lineContent ,
} ) ;
}
}
return results ;
}
/ * *
* Gets a description of the grep operation
* @param params Parameters for the grep operation
* @returns A string describing the grep
* /
getDescription ( params : GrepToolParams ) : string {
let description = ` ' ${ params . pattern } ' ` ;
if ( params . include ) {
description += ` in ${ params . include } ` ;
}
if ( params . path ) {
2025-05-18 23:13:57 -07:00
const resolvedPath = path . resolve ( this . rootDirectory , params . path ) ;
if ( resolvedPath === this . rootDirectory || params . path === '.' ) {
description += ` within ./ ` ;
} else {
const relativePath = makeRelative ( resolvedPath , this . rootDirectory ) ;
description += ` within ${ shortenPath ( relativePath ) } ` ;
}
2025-04-19 19:45:42 +01:00
}
return description ;
}
/ * *
* Performs the actual search using the prioritized strategies .
* @param options Search options including pattern , absolute path , and include glob .
* @returns A promise resolving to an array of match objects .
* /
private async performGrepSearch ( options : {
pattern : string ;
path : string ; // Expects absolute path
include? : string ;
2025-06-12 19:46:00 -07:00
signal : AbortSignal ;
2025-04-19 19:45:42 +01:00
} ) : Promise < GrepMatch [ ] > {
const { pattern , path : absolutePath , include } = options ;
let strategyUsed = 'none' ;
try {
// --- Strategy 1: git grep ---
const isGit = await this . isGitRepository ( absolutePath ) ;
const gitAvailable = isGit && ( await this . isCommandAvailable ( 'git' ) ) ;
if ( gitAvailable ) {
strategyUsed = 'git grep' ;
const gitArgs = [
'grep' ,
'--untracked' ,
'-n' ,
'-E' ,
'--ignore-case' ,
pattern ,
] ;
if ( include ) {
gitArgs . push ( '--' , include ) ;
}
try {
const output = await new Promise < string > ( ( resolve , reject ) = > {
const child = spawn ( 'git' , gitArgs , {
cwd : absolutePath ,
windowsHide : true ,
} ) ;
const stdoutChunks : Buffer [ ] = [ ] ;
const stderrChunks : Buffer [ ] = [ ] ;
child . stdout . on ( 'data' , ( chunk ) = > stdoutChunks . push ( chunk ) ) ;
child . stderr . on ( 'data' , ( chunk ) = > stderrChunks . push ( chunk ) ) ;
child . on ( 'error' , ( err ) = >
reject ( new Error ( ` Failed to start git grep: ${ err . message } ` ) ) ,
) ;
child . on ( 'close' , ( code ) = > {
const stdoutData = Buffer . concat ( stdoutChunks ) . toString ( 'utf8' ) ;
const stderrData = Buffer . concat ( stderrChunks ) . toString ( 'utf8' ) ;
if ( code === 0 ) resolve ( stdoutData ) ;
else if ( code === 1 )
resolve ( '' ) ; // No matches
else
reject (
new Error ( ` git grep exited with code ${ code } : ${ stderrData } ` ) ,
) ;
} ) ;
} ) ;
return this . parseGrepOutput ( output , absolutePath ) ;
} catch ( gitError : unknown ) {
2025-05-18 23:13:57 -07:00
console . debug (
2025-04-19 19:45:42 +01:00
` GrepLogic: git grep failed: ${ getErrorMessage ( gitError ) } . Falling back... ` ,
) ;
}
}
// --- Strategy 2: System grep ---
const grepAvailable = await this . isCommandAvailable ( 'grep' ) ;
if ( grepAvailable ) {
strategyUsed = 'system grep' ;
const grepArgs = [ '-r' , '-n' , '-H' , '-E' ] ;
const commonExcludes = [ '.git' , 'node_modules' , 'bower_components' ] ;
commonExcludes . forEach ( ( dir ) = > grepArgs . push ( ` --exclude-dir= ${ dir } ` ) ) ;
if ( include ) {
grepArgs . push ( ` --include= ${ include } ` ) ;
}
grepArgs . push ( pattern ) ;
grepArgs . push ( '.' ) ;
try {
const output = await new Promise < string > ( ( resolve , reject ) = > {
const child = spawn ( 'grep' , grepArgs , {
cwd : absolutePath ,
windowsHide : true ,
} ) ;
const stdoutChunks : Buffer [ ] = [ ] ;
const stderrChunks : Buffer [ ] = [ ] ;
2025-06-05 06:40:33 -07:00
const onData = ( chunk : Buffer ) = > stdoutChunks . push ( chunk ) ;
const onStderr = ( chunk : Buffer ) = > {
2025-04-19 19:45:42 +01:00
const stderrStr = chunk . toString ( ) ;
// Suppress common harmless stderr messages
if (
! stderrStr . includes ( 'Permission denied' ) &&
! /grep:.*: Is a directory/i . test ( stderrStr )
) {
stderrChunks . push ( chunk ) ;
}
2025-06-05 06:40:33 -07:00
} ;
const onError = ( err : Error ) = > {
cleanup ( ) ;
reject ( new Error ( ` Failed to start system grep: ${ err . message } ` ) ) ;
} ;
const onClose = ( code : number | null ) = > {
2025-04-19 19:45:42 +01:00
const stdoutData = Buffer . concat ( stdoutChunks ) . toString ( 'utf8' ) ;
const stderrData = Buffer . concat ( stderrChunks )
. toString ( 'utf8' )
. trim ( ) ;
2025-06-05 06:40:33 -07:00
cleanup ( ) ;
2025-04-19 19:45:42 +01:00
if ( code === 0 ) resolve ( stdoutData ) ;
else if ( code === 1 )
resolve ( '' ) ; // No matches
else {
if ( stderrData )
reject (
new Error (
` System grep exited with code ${ code } : ${ stderrData } ` ,
) ,
) ;
else resolve ( '' ) ; // Exit code > 1 but no stderr, likely just suppressed errors
}
2025-06-05 06:40:33 -07:00
} ;
const cleanup = ( ) = > {
child . stdout . removeListener ( 'data' , onData ) ;
child . stderr . removeListener ( 'data' , onStderr ) ;
child . removeListener ( 'error' , onError ) ;
child . removeListener ( 'close' , onClose ) ;
if ( child . connected ) {
child . disconnect ( ) ;
}
} ;
child . stdout . on ( 'data' , onData ) ;
child . stderr . on ( 'data' , onStderr ) ;
child . on ( 'error' , onError ) ;
child . on ( 'close' , onClose ) ;
2025-04-19 19:45:42 +01:00
} ) ;
return this . parseGrepOutput ( output , absolutePath ) ;
} catch ( grepError : unknown ) {
2025-05-18 23:13:57 -07:00
console . debug (
2025-04-19 19:45:42 +01:00
` GrepLogic: System grep failed: ${ getErrorMessage ( grepError ) } . Falling back... ` ,
) ;
}
}
// --- Strategy 3: Pure JavaScript Fallback ---
2025-05-18 23:13:57 -07:00
console . debug (
2025-04-19 19:45:42 +01:00
'GrepLogic: Falling back to JavaScript grep implementation.' ,
) ;
strategyUsed = 'javascript fallback' ;
const globPattern = include ? include : '**/*' ;
const ignorePatterns = [
'.git/**' ,
'node_modules/**' ,
'bower_components/**' ,
'.svn/**' ,
'.hg/**' ,
] ; // Use glob patterns for ignores here
2025-06-12 19:46:00 -07:00
const filesStream = globStream ( globPattern , {
2025-04-19 19:45:42 +01:00
cwd : absolutePath ,
dot : true ,
ignore : ignorePatterns ,
absolute : true ,
2025-06-12 19:46:00 -07:00
nodir : true ,
signal : options.signal ,
2025-04-19 19:45:42 +01:00
} ) ;
const regex = new RegExp ( pattern , 'i' ) ;
const allMatches : GrepMatch [ ] = [ ] ;
for await ( const filePath of filesStream ) {
const fileAbsolutePath = filePath as string ;
try {
const content = await fsPromises . readFile ( fileAbsolutePath , 'utf8' ) ;
const lines = content . split ( /\r?\n/ ) ;
lines . forEach ( ( line , index ) = > {
if ( regex . test ( line ) ) {
allMatches . push ( {
filePath :
path . relative ( absolutePath , fileAbsolutePath ) ||
path . basename ( fileAbsolutePath ) ,
lineNumber : index + 1 ,
line ,
} ) ;
}
} ) ;
} catch ( readError : unknown ) {
// Ignore errors like permission denied or file gone during read
if ( ! isNodeError ( readError ) || readError . code !== 'ENOENT' ) {
2025-05-18 23:13:57 -07:00
console . debug (
2025-04-19 19:45:42 +01:00
` GrepLogic: Could not read/process ${ fileAbsolutePath } : ${ getErrorMessage ( readError ) } ` ,
) ;
}
}
}
return allMatches ;
} catch ( error : unknown ) {
console . error (
` GrepLogic: Error in performGrepSearch (Strategy: ${ strategyUsed } ): ${ getErrorMessage ( error ) } ` ,
) ;
throw error ; // Re-throw
}
}
}