qwen-code/packages/core/src/tools/read-many-files.test.ts

644 lines
24 KiB
TypeScript
Raw Normal View History

2025-05-20 13:02:41 -07:00
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
2025-05-20 13:02:41 -07:00
import type { Mock } from 'vitest';
import { mockControl } from '../__mocks__/fs/promises.js';
import { ReadManyFilesTool } from './read-many-files.js';
Ignore folders files (#651) # Add .gitignore-Aware File Filtering to gemini-cli This pull request introduces .gitignore-based file filtering to the gemini-cli, ensuring that git-ignored files are automatically excluded from file-related operations and suggestions throughout the CLI. The update enhances usability, reduces noise from build artifacts and dependencies, and provides new configuration options for fine-tuning file discovery. Key Improvements .gitignore File Filtering All @ (at) commands, file completions, and core discovery tools now honor .gitignore patterns by default. Git-ignored files (such as node_modules/, dist/, .env, and .git) are excluded from results unless explicitly overridden. The behavior can be customized via a new fileFiltering section in settings.json, including options for: Turning .gitignore respect on/off. Adding custom ignore patterns. Allowing or excluding build artifacts. Configuration & Documentation Updates settings.json schema extended with fileFiltering options. Documentation updated to explain new filtering controls and usage patterns. Testing New and updated integration/unit tests for file filtering logic, configuration merging, and edge cases. Test coverage ensures .gitignore filtering works as intended across different workflows. Internal Refactoring Core file discovery logic refactored for maintainability and extensibility. Underlying tools (ls, glob, read-many-files) now support git-aware filtering out of the box. Co-authored-by: N. Taylor Mullen <ntaylormullen@google.com>
2025-06-03 21:40:46 -07:00
import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
2025-05-20 13:02:41 -07:00
import path from 'path';
import fs from 'fs'; // Actual fs for setup
import os from 'os';
import { Config } from '../config/config.js';
import { WorkspaceContext } from '../utils/workspaceContext.js';
2025-05-20 13:02:41 -07:00
vi.mock('mime-types', () => {
const lookup = (filename: string) => {
if (filename.endsWith('.ts') || filename.endsWith('.js')) {
return 'text/plain';
}
if (filename.endsWith('.png')) {
return 'image/png';
}
if (filename.endsWith('.pdf')) {
return 'application/pdf';
}
if (filename.endsWith('.mp3') || filename.endsWith('.wav')) {
return 'audio/mpeg';
}
if (filename.endsWith('.mp4') || filename.endsWith('.mov')) {
return 'video/mp4';
}
return false;
};
return {
default: {
lookup,
},
lookup,
};
});
2025-05-20 13:02:41 -07:00
describe('ReadManyFilesTool', () => {
let tool: ReadManyFilesTool;
let tempRootDir: string;
let tempDirOutsideRoot: string;
let mockReadFileFn: Mock;
beforeEach(async () => {
tempRootDir = fs.realpathSync(
fs.mkdtempSync(path.join(os.tmpdir(), 'read-many-files-root-')),
2025-05-20 13:02:41 -07:00
);
tempDirOutsideRoot = fs.realpathSync(
fs.mkdtempSync(path.join(os.tmpdir(), 'read-many-files-external-')),
2025-05-20 13:02:41 -07:00
);
fs.writeFileSync(path.join(tempRootDir, '.geminiignore'), 'foo.*');
const fileService = new FileDiscoveryService(tempRootDir);
const mockConfig = {
getFileService: () => fileService,
getFileFilteringOptions: () => ({
respectGitIgnore: true,
respectGeminiIgnore: true,
}),
getTargetDir: () => tempRootDir,
getWorkspaceDirs: () => [tempRootDir],
getWorkspaceContext: () => new WorkspaceContext(tempRootDir),
} as Partial<Config> as Config;
tool = new ReadManyFilesTool(mockConfig);
2025-05-20 13:02:41 -07:00
mockReadFileFn = mockControl.mockReadFile;
mockReadFileFn.mockReset();
mockReadFileFn.mockImplementation(
async (filePath: fs.PathLike, options?: Record<string, unknown>) => {
const fp =
typeof filePath === 'string'
? filePath
: (filePath as Buffer).toString();
if (fs.existsSync(fp)) {
const originalFs = await vi.importActual<typeof fs>('fs');
return originalFs.promises.readFile(fp, options);
}
if (fp.endsWith('nonexistent-file.txt')) {
const err = new Error(
`ENOENT: no such file or directory, open '${fp}'`,
);
(err as NodeJS.ErrnoException).code = 'ENOENT';
throw err;
}
if (fp.endsWith('unreadable.txt')) {
const err = new Error(`EACCES: permission denied, open '${fp}'`);
(err as NodeJS.ErrnoException).code = 'EACCES';
throw err;
}
if (fp.endsWith('.png'))
return Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]); // PNG header
if (fp.endsWith('.pdf')) return Buffer.from('%PDF-1.4...'); // PDF start
if (fp.endsWith('binary.bin'))
return Buffer.from([0x00, 0x01, 0x02, 0x00, 0x03]);
const err = new Error(
`ENOENT: no such file or directory, open '${fp}' (unmocked path)`,
);
(err as NodeJS.ErrnoException).code = 'ENOENT';
throw err;
},
);
});
afterEach(() => {
if (fs.existsSync(tempRootDir)) {
fs.rmSync(tempRootDir, { recursive: true, force: true });
}
if (fs.existsSync(tempDirOutsideRoot)) {
fs.rmSync(tempDirOutsideRoot, { recursive: true, force: true });
}
});
describe('validateParams', () => {
it('should return null for valid relative paths within root', () => {
const params = { paths: ['file1.txt', 'subdir/file2.txt'] };
expect(tool.validateParams(params)).toBeNull();
});
it('should return null for valid glob patterns within root', () => {
const params = { paths: ['*.txt', 'subdir/**/*.js'] };
expect(tool.validateParams(params)).toBeNull();
});
it('should return null for paths trying to escape the root (e.g., ../) as execute handles this', () => {
const params = { paths: ['../outside.txt'] };
expect(tool.validateParams(params)).toBeNull();
});
it('should return null for absolute paths as execute handles this', () => {
const params = { paths: [path.join(tempDirOutsideRoot, 'absolute.txt')] };
expect(tool.validateParams(params)).toBeNull();
});
it('should return error if paths array is empty', () => {
const params = { paths: [] };
expect(tool.validateParams(params)).toBe(
'params/paths must NOT have fewer than 1 items',
2025-05-20 13:02:41 -07:00
);
});
it('should return null for valid exclude and include patterns', () => {
const params = {
paths: ['src/**/*.ts'],
exclude: ['**/*.test.ts'],
include: ['src/utils/*.ts'],
};
expect(tool.validateParams(params)).toBeNull();
});
it('should return error if paths array contains an empty string', () => {
const params = { paths: ['file1.txt', ''] };
expect(tool.validateParams(params)).toBe(
'params/paths/1 must NOT have fewer than 1 characters',
);
});
it('should return error if include array contains non-string elements', () => {
const params = {
paths: ['file1.txt'],
include: ['*.ts', 123] as string[],
};
expect(tool.validateParams(params)).toBe(
'params/include/1 must be string',
);
});
it('should return error if exclude array contains non-string elements', () => {
const params = {
paths: ['file1.txt'],
exclude: ['*.log', {}] as string[],
};
expect(tool.validateParams(params)).toBe(
'params/exclude/1 must be string',
);
});
2025-05-20 13:02:41 -07:00
});
describe('execute', () => {
const createFile = (filePath: string, content = '') => {
const fullPath = path.join(tempRootDir, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, content);
};
const createBinaryFile = (filePath: string, data: Uint8Array) => {
const fullPath = path.join(tempRootDir, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, data);
};
it('should read a single specified file', async () => {
createFile('file1.txt', 'Content of file1');
const params = { paths: ['file1.txt'] };
const result = await tool.execute(params, new AbortController().signal);
const expectedPath = path.join(tempRootDir, 'file1.txt');
2025-05-20 13:02:41 -07:00
expect(result.llmContent).toEqual([
`--- ${expectedPath} ---\n\nContent of file1\n\n`,
2025-05-20 13:02:41 -07:00
]);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should read multiple specified files', async () => {
createFile('file1.txt', 'Content1');
createFile('subdir/file2.js', 'Content2');
const params = { paths: ['file1.txt', 'subdir/file2.js'] };
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath1 = path.join(tempRootDir, 'file1.txt');
const expectedPath2 = path.join(tempRootDir, 'subdir/file2.js');
2025-05-20 13:02:41 -07:00
expect(
content.some((c) =>
c.includes(`--- ${expectedPath1} ---\n\nContent1\n\n`),
),
2025-05-20 13:02:41 -07:00
).toBe(true);
expect(
content.some((c) =>
c.includes(`--- ${expectedPath2} ---\n\nContent2\n\n`),
2025-05-20 13:02:41 -07:00
),
).toBe(true);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **2 file(s)**',
);
});
it('should handle glob patterns', async () => {
createFile('file.txt', 'Text file');
createFile('another.txt', 'Another text');
createFile('sub/data.json', '{}');
const params = { paths: ['*.txt'] };
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath1 = path.join(tempRootDir, 'file.txt');
const expectedPath2 = path.join(tempRootDir, 'another.txt');
2025-05-20 13:02:41 -07:00
expect(
content.some((c) =>
c.includes(`--- ${expectedPath1} ---\n\nText file\n\n`),
),
2025-05-20 13:02:41 -07:00
).toBe(true);
expect(
content.some((c) =>
c.includes(`--- ${expectedPath2} ---\n\nAnother text\n\n`),
2025-05-20 13:02:41 -07:00
),
).toBe(true);
expect(content.find((c) => c.includes('sub/data.json'))).toBeUndefined();
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **2 file(s)**',
);
});
it('should respect exclude patterns', async () => {
createFile('src/main.ts', 'Main content');
createFile('src/main.test.ts', 'Test content');
const params = { paths: ['src/**/*.ts'], exclude: ['**/*.test.ts'] };
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath = path.join(tempRootDir, 'src/main.ts');
expect(content).toEqual([`--- ${expectedPath} ---\n\nMain content\n\n`]);
2025-05-20 13:02:41 -07:00
expect(
content.find((c) => c.includes('src/main.test.ts')),
).toBeUndefined();
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should handle nonexistent specific files gracefully', async () => {
2025-05-20 13:02:41 -07:00
const params = { paths: ['nonexistent-file.txt'] };
const result = await tool.execute(params, new AbortController().signal);
expect(result.llmContent).toEqual([
'No files matching the criteria were found or all were skipped.',
]);
expect(result.returnDisplay).toContain(
'No files were read and concatenated based on the criteria.',
);
});
it('should use default excludes', async () => {
createFile('node_modules/some-lib/index.js', 'lib code');
createFile('src/app.js', 'app code');
const params = { paths: ['**/*.js'] };
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath = path.join(tempRootDir, 'src/app.js');
expect(content).toEqual([`--- ${expectedPath} ---\n\napp code\n\n`]);
2025-05-20 13:02:41 -07:00
expect(
content.find((c) => c.includes('node_modules/some-lib/index.js')),
).toBeUndefined();
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should NOT use default excludes if useDefaultExcludes is false', async () => {
createFile('node_modules/some-lib/index.js', 'lib code');
createFile('src/app.js', 'app code');
const params = { paths: ['**/*.js'], useDefaultExcludes: false };
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath1 = path.join(
tempRootDir,
'node_modules/some-lib/index.js',
);
const expectedPath2 = path.join(tempRootDir, 'src/app.js');
2025-05-20 13:02:41 -07:00
expect(
content.some((c) =>
c.includes(`--- ${expectedPath1} ---\n\nlib code\n\n`),
2025-05-20 13:02:41 -07:00
),
).toBe(true);
expect(
content.some((c) =>
c.includes(`--- ${expectedPath2} ---\n\napp code\n\n`),
),
2025-05-20 13:02:41 -07:00
).toBe(true);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **2 file(s)**',
);
});
it('should include images as inlineData parts if explicitly requested by extension', async () => {
createBinaryFile(
'image.png',
Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]),
);
const params = { paths: ['*.png'] }; // Explicitly requesting .png
const result = await tool.execute(params, new AbortController().signal);
expect(result.llmContent).toEqual([
{
inlineData: {
data: Buffer.from([
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a,
]).toString('base64'),
mimeType: 'image/png',
},
},
]);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **1 file(s)**',
);
});
it('should include images as inlineData parts if explicitly requested by name', async () => {
createBinaryFile(
'myExactImage.png',
Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]),
);
const params = { paths: ['myExactImage.png'] }; // Explicitly requesting by full name
const result = await tool.execute(params, new AbortController().signal);
expect(result.llmContent).toEqual([
{
inlineData: {
data: Buffer.from([
0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a,
]).toString('base64'),
mimeType: 'image/png',
},
},
]);
});
it('should skip PDF files if not explicitly requested by extension or name', async () => {
createBinaryFile('document.pdf', Buffer.from('%PDF-1.4...'));
createFile('notes.txt', 'text notes');
const params = { paths: ['*'] }; // Generic glob, not specific to .pdf
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
const expectedPath = path.join(tempRootDir, 'notes.txt');
2025-05-20 13:02:41 -07:00
expect(
content.some(
(c) =>
typeof c === 'string' &&
c.includes(`--- ${expectedPath} ---\n\ntext notes\n\n`),
2025-05-20 13:02:41 -07:00
),
).toBe(true);
2025-05-29 14:03:24 -07:00
expect(result.returnDisplay).toContain('**Skipped 1 item(s):**');
2025-05-20 13:02:41 -07:00
expect(result.returnDisplay).toContain(
'- `document.pdf` (Reason: asset file (image/pdf) was not explicitly requested by name or extension)',
);
});
it('should include PDF files as inlineData parts if explicitly requested by extension', async () => {
createBinaryFile('important.pdf', Buffer.from('%PDF-1.4...'));
const params = { paths: ['*.pdf'] }; // Explicitly requesting .pdf files
const result = await tool.execute(params, new AbortController().signal);
expect(result.llmContent).toEqual([
{
inlineData: {
data: Buffer.from('%PDF-1.4...').toString('base64'),
mimeType: 'application/pdf',
},
},
]);
});
it('should include PDF files as inlineData parts if explicitly requested by name', async () => {
createBinaryFile('report-final.pdf', Buffer.from('%PDF-1.4...'));
const params = { paths: ['report-final.pdf'] };
const result = await tool.execute(params, new AbortController().signal);
expect(result.llmContent).toEqual([
{
inlineData: {
data: Buffer.from('%PDF-1.4...').toString('base64'),
mimeType: 'application/pdf',
},
},
]);
});
it('should return error if path is ignored by a .geminiignore pattern', async () => {
createFile('foo.bar', '');
createFile('bar.ts', '');
createFile('foo.quux', '');
const params = { paths: ['foo.bar', 'bar.ts', 'foo.quux'] };
const result = await tool.execute(params, new AbortController().signal);
expect(result.returnDisplay).not.toContain('foo.bar');
expect(result.returnDisplay).not.toContain('foo.quux');
expect(result.returnDisplay).toContain('bar.ts');
});
it('should read files from multiple workspace directories', async () => {
const tempDir1 = fs.realpathSync(
fs.mkdtempSync(path.join(os.tmpdir(), 'multi-dir-1-')),
);
const tempDir2 = fs.realpathSync(
fs.mkdtempSync(path.join(os.tmpdir(), 'multi-dir-2-')),
);
const fileService = new FileDiscoveryService(tempDir1);
const mockConfig = {
getFileService: () => fileService,
getFileFilteringOptions: () => ({
respectGitIgnore: true,
respectGeminiIgnore: true,
}),
getWorkspaceContext: () => new WorkspaceContext(tempDir1, [tempDir2]),
getTargetDir: () => tempDir1,
} as Partial<Config> as Config;
tool = new ReadManyFilesTool(mockConfig);
fs.writeFileSync(path.join(tempDir1, 'file1.txt'), 'Content1');
fs.writeFileSync(path.join(tempDir2, 'file2.txt'), 'Content2');
const params = { paths: ['*.txt'] };
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
if (!Array.isArray(content)) {
throw new Error(`llmContent is not an array: ${content}`);
}
const expectedPath1 = path.join(tempDir1, 'file1.txt');
const expectedPath2 = path.join(tempDir2, 'file2.txt');
expect(
content.some((c) =>
c.includes(`--- ${expectedPath1} ---\n\nContent1\n\n`),
),
).toBe(true);
expect(
content.some((c) =>
c.includes(`--- ${expectedPath2} ---\n\nContent2\n\n`),
),
).toBe(true);
expect(result.returnDisplay).toContain(
'Successfully read and concatenated content from **2 file(s)**',
);
fs.rmSync(tempDir1, { recursive: true, force: true });
fs.rmSync(tempDir2, { recursive: true, force: true });
});
it('should add a warning for truncated files', async () => {
createFile('file1.txt', 'Content1');
// Create a file that will be "truncated" by making it long
const longContent = Array.from({ length: 2500 }, (_, i) => `L${i}`).join(
'\n',
);
createFile('large-file.txt', longContent);
const params = { paths: ['*.txt'] };
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
const normalFileContent = content.find((c) => c.includes('file1.txt'));
const truncatedFileContent = content.find((c) =>
c.includes('large-file.txt'),
);
expect(normalFileContent).not.toContain(
'[WARNING: This file was truncated.',
);
expect(truncatedFileContent).toContain(
"[WARNING: This file was truncated. To view the full content, use the 'read_file' tool on this specific file.]",
);
// Check that the actual content is still there but truncated
expect(truncatedFileContent).toContain('L200');
expect(truncatedFileContent).not.toContain('L2400');
});
2025-05-20 13:02:41 -07:00
});
describe('Batch Processing', () => {
const createMultipleFiles = (count: number, contentPrefix = 'Content') => {
const files: string[] = [];
for (let i = 0; i < count; i++) {
const fileName = `file${i}.txt`;
createFile(fileName, `${contentPrefix} ${i}`);
files.push(fileName);
}
return files;
};
const createFile = (filePath: string, content = '') => {
const fullPath = path.join(tempRootDir, filePath);
fs.mkdirSync(path.dirname(fullPath), { recursive: true });
fs.writeFileSync(fullPath, content);
};
it('should process files in parallel for performance', async () => {
// Mock detectFileType to add artificial delay to simulate I/O
const detectFileTypeSpy = vi.spyOn(
await import('../utils/fileUtils.js'),
'detectFileType',
);
// Create files
const fileCount = 4;
const files = createMultipleFiles(fileCount, 'Batch test');
// Mock with 100ms delay per file to simulate I/O operations
detectFileTypeSpy.mockImplementation(async (_filePath: string) => {
await new Promise((resolve) => setTimeout(resolve, 100));
return 'text';
});
const startTime = Date.now();
const params = { paths: files };
const result = await tool.execute(params, new AbortController().signal);
const endTime = Date.now();
const processingTime = endTime - startTime;
console.log(
`Processing time: ${processingTime}ms for ${fileCount} files`,
);
// Verify parallel processing performance improvement
// Parallel processing should complete in ~100ms (single file time)
// Sequential would take ~400ms (4 files × 100ms each)
expect(processingTime).toBeLessThan(200); // Should PASS with parallel implementation
// Verify all files were processed
const content = result.llmContent as string[];
expect(content).toHaveLength(fileCount);
// Cleanup mock
detectFileTypeSpy.mockRestore();
});
it('should handle batch processing errors gracefully', async () => {
// Create mix of valid and problematic files
createFile('valid1.txt', 'Valid content 1');
createFile('valid2.txt', 'Valid content 2');
createFile('valid3.txt', 'Valid content 3');
const params = {
paths: [
'valid1.txt',
'valid2.txt',
'nonexistent-file.txt', // This will fail
'valid3.txt',
],
};
const result = await tool.execute(params, new AbortController().signal);
const content = result.llmContent as string[];
// Should successfully process valid files despite one failure
expect(content.length).toBeGreaterThanOrEqual(3);
expect(result.returnDisplay).toContain('Successfully read');
// Verify valid files were processed
const expectedPath1 = path.join(tempRootDir, 'valid1.txt');
const expectedPath3 = path.join(tempRootDir, 'valid3.txt');
expect(content.some((c) => c.includes(expectedPath1))).toBe(true);
expect(content.some((c) => c.includes(expectedPath3))).toBe(true);
});
it('should execute file operations concurrently', async () => {
// Track execution order to verify concurrency
const executionOrder: string[] = [];
const detectFileTypeSpy = vi.spyOn(
await import('../utils/fileUtils.js'),
'detectFileType',
);
const files = ['file1.txt', 'file2.txt', 'file3.txt'];
files.forEach((file) => createFile(file, 'test content'));
// Mock to track concurrent vs sequential execution
detectFileTypeSpy.mockImplementation(async (filePath: string) => {
const fileName = filePath.split('/').pop() || '';
executionOrder.push(`start:${fileName}`);
// Add delay to make timing differences visible
await new Promise((resolve) => setTimeout(resolve, 50));
executionOrder.push(`end:${fileName}`);
return 'text';
});
await tool.execute({ paths: files }, new AbortController().signal);
console.log('Execution order:', executionOrder);
// Verify concurrent execution pattern
// In parallel execution: all "start:" events should come before all "end:" events
// In sequential execution: "start:file1", "end:file1", "start:file2", "end:file2", etc.
const startEvents = executionOrder.filter((e) =>
e.startsWith('start:'),
).length;
const firstEndIndex = executionOrder.findIndex((e) =>
e.startsWith('end:'),
);
const startsBeforeFirstEnd = executionOrder
.slice(0, firstEndIndex)
.filter((e) => e.startsWith('start:')).length;
// For parallel processing, ALL start events should happen before the first end event
expect(startsBeforeFirstEnd).toBe(startEvents); // Should PASS with parallel implementation
detectFileTypeSpy.mockRestore();
});
});
2025-05-20 13:02:41 -07:00
});