Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 68 additions & 1 deletion apps/sim/lib/file-parsers/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,31 @@ const mockDocxParseFile = vi.fn().mockResolvedValue({
},
})

const mockTxtParseFile = vi.fn().mockResolvedValue({
content: 'Parsed TXT content',
metadata: {
characterCount: 100,
tokenCount: 10,
},
})

const mockMdParseFile = vi.fn().mockResolvedValue({
content: 'Parsed MD content',
metadata: {
characterCount: 100,
tokenCount: 10,
},
})

// Create mock module implementation
const createMockModule = () => {
// Create mock parsers
const mockParsers: Record<string, FileParser> = {
pdf: { parseFile: mockPdfParseFile },
csv: { parseFile: mockCsvParseFile },
docx: { parseFile: mockDocxParseFile },
txt: { parseFile: mockTxtParseFile },
md: { parseFile: mockMdParseFile },
}

// Create the mock module implementation
Expand Down Expand Up @@ -122,6 +140,18 @@ describe('File Parsers', () => {
})),
}))

vi.doMock('./txt-parser', () => ({
TxtParser: vi.fn().mockImplementation(() => ({
parseFile: mockTxtParseFile,
})),
}))

vi.doMock('./md-parser', () => ({
MdParser: vi.fn().mockImplementation(() => ({
parseFile: mockMdParseFile,
})),
}))

// Silence console output during tests
global.console = {
...console,
Expand Down Expand Up @@ -211,6 +241,40 @@ describe('File Parsers', () => {
expect(result).toEqual(expectedResult)
})

it('should parse TXT files successfully', async () => {
const expectedResult = {
content: 'Parsed TXT content',
metadata: {
characterCount: 100,
tokenCount: 10,
},
}

mockTxtParseFile.mockResolvedValueOnce(expectedResult)
mockExistsSync.mockReturnValue(true)

const { parseFile } = await import('./index')
const result = await parseFile('/test/files/document.txt')

expect(result).toEqual(expectedResult)
})

it('should parse MD files successfully', async () => {
const expectedResult = {
content: 'Parsed MD content',
metadata: {
characterCount: 100,
tokenCount: 10,
},
}

mockMdParseFile.mockResolvedValueOnce(expectedResult)
mockExistsSync.mockReturnValue(true)

const { parseFile } = await import('./index')
const result = await parseFile('/test/files/document.md')
})
Comment on lines +262 to +276
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Missing expect assertion for test result. Add expect(result).toEqual(expectedResult) to verify the parsed output.

Suggested change
it('should parse MD files successfully', async () => {
const expectedResult = {
content: 'Parsed MD content',
metadata: {
characterCount: 100,
tokenCount: 10,
},
}
mockMdParseFile.mockResolvedValueOnce(expectedResult)
mockExistsSync.mockReturnValue(true)
const { parseFile } = await import('./index')
const result = await parseFile('/test/files/document.md')
})
it('should parse MD files successfully', async () => {
const expectedResult = {
content: 'Parsed MD content',
metadata: {
characterCount: 100,
tokenCount: 10,
},
}
mockMdParseFile.mockResolvedValueOnce(expectedResult)
mockExistsSync.mockReturnValue(true)
const { parseFile } = await import('./index')
const result = await parseFile('/test/files/document.md')
expect(result).toEqual(expectedResult)
})


it('should throw error for unsupported file types', async () => {
// Make sure the file "exists" for this test
mockExistsSync.mockReturnValue(true)
Expand Down Expand Up @@ -240,13 +304,14 @@ describe('File Parsers', () => {
expect(isSupportedFileType('pdf')).toBe(true)
expect(isSupportedFileType('csv')).toBe(true)
expect(isSupportedFileType('docx')).toBe(true)
expect(isSupportedFileType('txt')).toBe(true)
expect(isSupportedFileType('md')).toBe(true)
})

it('should return false for unsupported file types', async () => {
const { isSupportedFileType } = await import('./index')

expect(isSupportedFileType('png')).toBe(false)
expect(isSupportedFileType('txt')).toBe(false)
expect(isSupportedFileType('unknown')).toBe(false)
})

Expand All @@ -255,6 +320,8 @@ describe('File Parsers', () => {

expect(isSupportedFileType('PDF')).toBe(true)
expect(isSupportedFileType('CSV')).toBe(true)
expect(isSupportedFileType('TXT')).toBe(true)
expect(isSupportedFileType('MD')).toBe(true)
})

it('should handle errors gracefully', async () => {
Expand Down
14 changes: 14 additions & 0 deletions apps/sim/lib/file-parsers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,20 @@ function getParserInstances(): Record<string, FileParser> {
} catch (error) {
logger.error('Failed to load DOCX parser:', error)
}

try {
const { TxtParser } = require('./txt-parser')
parserInstances.txt = new TxtParser()
} catch (error) {
logger.error('Failed to load TXT parser:', error)
}

try {
const { MdParser } = require('./md-parser')
parserInstances.md = new MdParser()
} catch (error) {
logger.error('Failed to load MD parser:', error)
}
} catch (error) {
logger.error('Error loading file parsers:', error)
}
Expand Down
45 changes: 45 additions & 0 deletions apps/sim/lib/file-parsers/md-parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { readFile } from 'fs/promises'
import { createLogger } from '@/lib/logs/console-logger'
import type { FileParseResult, FileParser } from './types'

const logger = createLogger('MdParser')

export class MdParser implements FileParser {
async parseFile(filePath: string): Promise<FileParseResult> {
try {
// Validate input
if (!filePath) {
throw new Error('No file path provided')
}

// Read the file
const buffer = await readFile(filePath)

// Use parseBuffer for consistent implementation
return this.parseBuffer(buffer)
} catch (error) {
logger.error('MD file error:', error)
throw new Error(`Failed to parse MD file: ${(error as Error).message}`)
}
}

async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
try {
logger.info('Parsing buffer, size:', buffer.length)

// Extract content
const result = buffer.toString('utf-8')

return {
content: result,
metadata: {
characterCount: result.length,
tokenCount: result.length / 4,
},
}
} catch (error) {
logger.error('MD buffer parsing error:', error)
throw new Error(`Failed to parse MD buffer: ${(error as Error).message}`)
}
}
}
45 changes: 45 additions & 0 deletions apps/sim/lib/file-parsers/txt-parser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { readFile } from 'fs/promises'
import { createLogger } from '@/lib/logs/console-logger'
import type { FileParseResult, FileParser } from './types'

const logger = createLogger('TxtParser')

export class TxtParser implements FileParser {
async parseFile(filePath: string): Promise<FileParseResult> {
try {
// Validate input
if (!filePath) {
throw new Error('No file path provided')
}

// Read the file
const buffer = await readFile(filePath)

// Use parseBuffer for consistent implementation
return this.parseBuffer(buffer)
} catch (error) {
logger.error('TXT file error:', error)
throw new Error(`Failed to parse TXT file: ${(error as Error).message}`)
}
}

async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
try {
logger.info('Parsing buffer, size:', buffer.length)

// Extract content
const result = buffer.toString('utf-8')

return {
content: result,
metadata: {
characterCount: result.length,
tokenCount: result.length / 4,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Simple character length / 4 is not a reliable token count estimation. Consider using a proper tokenizer library or documenting this as a rough approximation.

},
}
} catch (error) {
logger.error('TXT buffer parsing error:', error)
throw new Error(`Failed to parse TXT buffer: ${(error as Error).message}`)
}
}
}