Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,15 @@
import pl.poznan.put.structure.formats.DefaultDotBracket;
import pl.poznan.put.structure.formats.DotBracket;

import java.util.Arrays;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@Component
public class SecondaryFileParser {

private static final Logger LOGGER = LoggerFactory.getLogger(SecondaryFileParser.class);
private static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
private final Converter converter;

public DotBracket parseSecondaryFile(String content, InputType inputType, boolean removeIsolated) {
Expand All @@ -38,9 +43,10 @@ public DotBracket parseSecondaryFile(String content, InputType inputType, boolea

private DotBracket convertBpSeqIntoDotBracket(String content, boolean removeIsolated) {
try {
String normalizedContent = normalizeSecondaryFileContent(content);
BpSeq bpSeq = removeIsolated
? BpSeq.fromString(content).withoutIsolatedPairs()
: BpSeq.fromString(content);
? BpSeq.fromString(normalizedContent).withoutIsolatedPairs()
: BpSeq.fromString(normalizedContent);
Ct ct = Ct.fromBpSeq(bpSeq);
return DefaultDotBracket.copyWithStrands(converter.convert(bpSeq), ct);
} catch (IllegalArgumentException exception) {
Expand All @@ -52,9 +58,10 @@ private DotBracket convertBpSeqIntoDotBracket(String content, boolean removeIsol

private DotBracket convertCtIntoDotBracket(String content, boolean removeIsolated) {
try {
String normalizedContent = normalizeSecondaryFileContent(content);
Ct ct = removeIsolated
? Ct.fromString(content).withoutIsolatedPairs()
: Ct.fromString(content);
? Ct.fromString(normalizedContent).withoutIsolatedPairs()
: Ct.fromString(normalizedContent);
BpSeq bpSeq = BpSeq.fromCt(ct);
return DefaultDotBracket.copyWithStrands(converter.convert(bpSeq), ct);
} catch (IllegalArgumentException exception) {
Expand All @@ -79,6 +86,14 @@ private DotBracket readDotBracketContent(String content, boolean removeIsolated)
}
}

private String normalizeSecondaryFileContent(String content) {
return Arrays.stream(content.split("\\R"))
.map(String::trim)
.filter(line -> !line.isEmpty())
.map(line -> WHITESPACE_PATTERN.matcher(line).replaceAll(" "))
.collect(Collectors.joining("\n"));
}

@Autowired
public SecondaryFileParser(Converter converter) {
this.converter = converter;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package pl.poznan.put.rnapdbee.engine.shared.parser;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import pl.poznan.put.rnapdbee.engine.shared.domain.InputType;
import pl.poznan.put.structure.formats.BpSeq;
import pl.poznan.put.structure.formats.Converter;
import pl.poznan.put.structure.formats.DefaultDotBracket;
import pl.poznan.put.structure.formats.DotBracket;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

@ExtendWith(MockitoExtension.class)
class SecondaryFileParserTest {

@Mock
private Converter converter;

@InjectMocks
private SecondaryFileParser secondaryFileParser;

@Test
void shouldParseBpSeqWithRedundantSpacesAndEmptyLines() {
String content = "1 A 4\n2 U 3\n\n 3 G 2\n4 C 1\n";
DotBracket conversionResult = mock(DotBracket.class);
when(conversionResult.sequence()).thenReturn("AAAA");
when(conversionResult.structure()).thenReturn("()()");
when(converter.convert(any(BpSeq.class))).thenReturn(conversionResult);

DotBracket result = secondaryFileParser.parseSecondaryFile(content, InputType.BPSEQ, false);

assertThat(result).isNotNull();
verify(converter).convert(any(BpSeq.class));
}

@Test
void shouldParseCtWithRedundantSpacesAndEmptyLines() {
String content = " 4 some header\n1 A 0 2 4 1\n 2 U 1 3 3 2\n\n3 G 2 4 2 3\n4 C 3 0 1 4\n";
DotBracket conversionResult = mock(DotBracket.class);
when(conversionResult.sequence()).thenReturn("AAAA");
when(conversionResult.structure()).thenReturn("()()");
when(converter.convert(any(BpSeq.class))).thenReturn(conversionResult);

DotBracket result = secondaryFileParser.parseSecondaryFile(content, InputType.CT, false);

assertThat(result).isNotNull();
verify(converter).convert(any(BpSeq.class));
}
}