diff --git a/src/main/java/pl/poznan/put/rnapdbee/engine/shared/parser/SecondaryFileParser.java b/src/main/java/pl/poznan/put/rnapdbee/engine/shared/parser/SecondaryFileParser.java index 2a1ce4b..ffeca6c 100644 --- a/src/main/java/pl/poznan/put/rnapdbee/engine/shared/parser/SecondaryFileParser.java +++ b/src/main/java/pl/poznan/put/rnapdbee/engine/shared/parser/SecondaryFileParser.java @@ -13,10 +13,15 @@ import pl.poznan.put.structure.formats.DefaultDotBracket; import pl.poznan.put.structure.formats.DotBracket; +import java.util.Arrays; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + @Component public class SecondaryFileParser { private static final Logger LOGGER = LoggerFactory.getLogger(SecondaryFileParser.class); + private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+"); private final Converter converter; public DotBracket parseSecondaryFile(String content, InputType inputType, boolean removeIsolated) { @@ -38,9 +43,10 @@ public DotBracket parseSecondaryFile(String content, InputType inputType, boolea private DotBracket convertBpSeqIntoDotBracket(String content, boolean removeIsolated) { try { + String normalizedContent = normalizeSecondaryFileContent(content); BpSeq bpSeq = removeIsolated - ? BpSeq.fromString(content).withoutIsolatedPairs() - : BpSeq.fromString(content); + ? BpSeq.fromString(normalizedContent).withoutIsolatedPairs() + : BpSeq.fromString(normalizedContent); Ct ct = Ct.fromBpSeq(bpSeq); return DefaultDotBracket.copyWithStrands(converter.convert(bpSeq), ct); } catch (IllegalArgumentException exception) { @@ -52,9 +58,10 @@ private DotBracket convertBpSeqIntoDotBracket(String content, boolean removeIsol private DotBracket convertCtIntoDotBracket(String content, boolean removeIsolated) { try { + String normalizedContent = normalizeSecondaryFileContent(content); Ct ct = removeIsolated - ? Ct.fromString(content).withoutIsolatedPairs() - : Ct.fromString(content); + ? Ct.fromString(normalizedContent).withoutIsolatedPairs() + : Ct.fromString(normalizedContent); BpSeq bpSeq = BpSeq.fromCt(ct); return DefaultDotBracket.copyWithStrands(converter.convert(bpSeq), ct); } catch (IllegalArgumentException exception) { @@ -79,6 +86,14 @@ private DotBracket readDotBracketContent(String content, boolean removeIsolated) } } + private String normalizeSecondaryFileContent(String content) { + return Arrays.stream(content.split("\\R")) + .map(String::trim) + .filter(line -> !line.isEmpty()) + .map(line -> WHITESPACE_PATTERN.matcher(line).replaceAll(" ")) + .collect(Collectors.joining("\n")); + } + @Autowired public SecondaryFileParser(Converter converter) { this.converter = converter; diff --git a/src/test/java/pl/poznan/put/rnapdbee/engine/shared/parser/SecondaryFileParserTest.java b/src/test/java/pl/poznan/put/rnapdbee/engine/shared/parser/SecondaryFileParserTest.java new file mode 100644 index 0000000..045afff --- /dev/null +++ b/src/test/java/pl/poznan/put/rnapdbee/engine/shared/parser/SecondaryFileParserTest.java @@ -0,0 +1,56 @@ +package pl.poznan.put.rnapdbee.engine.shared.parser; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import pl.poznan.put.rnapdbee.engine.shared.domain.InputType; +import pl.poznan.put.structure.formats.BpSeq; +import pl.poznan.put.structure.formats.Converter; +import pl.poznan.put.structure.formats.DefaultDotBracket; +import pl.poznan.put.structure.formats.DotBracket; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +class SecondaryFileParserTest { + + @Mock + private Converter converter; + + @InjectMocks + private SecondaryFileParser secondaryFileParser; + + @Test + void shouldParseBpSeqWithRedundantSpacesAndEmptyLines() { + String content = "1 A 4\n2 U 3\n\n 3 G 2\n4 C 1\n"; + DotBracket conversionResult = mock(DotBracket.class); + when(conversionResult.sequence()).thenReturn("AAAA"); + when(conversionResult.structure()).thenReturn("()()"); + when(converter.convert(any(BpSeq.class))).thenReturn(conversionResult); + + DotBracket result = secondaryFileParser.parseSecondaryFile(content, InputType.BPSEQ, false); + + assertThat(result).isNotNull(); + verify(converter).convert(any(BpSeq.class)); + } + + @Test + void shouldParseCtWithRedundantSpacesAndEmptyLines() { + String content = " 4 some header\n1 A 0 2 4 1\n 2 U 1 3 3 2\n\n3 G 2 4 2 3\n4 C 3 0 1 4\n"; + DotBracket conversionResult = mock(DotBracket.class); + when(conversionResult.sequence()).thenReturn("AAAA"); + when(conversionResult.structure()).thenReturn("()()"); + when(converter.convert(any(BpSeq.class))).thenReturn(conversionResult); + + DotBracket result = secondaryFileParser.parseSecondaryFile(content, InputType.CT, false); + + assertThat(result).isNotNull(); + verify(converter).convert(any(BpSeq.class)); + } +}