From d8ea7e0257ca3b8b6975e30289911b78a18d9899 Mon Sep 17 00:00:00 2001 From: Denis Veselov Date: Thu, 18 Mar 2021 21:04:16 +0300 Subject: [PATCH] Impove detect language processor: if line empty - dont break full line --- src/textdatasetcleaner/processors/detect_language.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/textdatasetcleaner/processors/detect_language.py b/src/textdatasetcleaner/processors/detect_language.py index be9d342..7286384 100644 --- a/src/textdatasetcleaner/processors/detect_language.py +++ b/src/textdatasetcleaner/processors/detect_language.py @@ -48,6 +48,10 @@ def __init__( def process_line(self, line: str) -> Optional[str]: line_cpy = get_line_piece(line, self.delimiter, self.delimited_position) + # if piece empty - dont break full line + if not line_cpy: + return line + # TODO: `line_cpy = line_cpy.lower()` ? result = self.ft.predict(line_cpy, k=1)