diff --git a/dspace-api/src/main/java/org/dspace/health/MetadataCheck.java b/dspace-api/src/main/java/org/dspace/health/MetadataCheck.java new file mode 100644 index 000000000000..7f0539a99915 --- /dev/null +++ b/dspace-api/src/main/java/org/dspace/health/MetadataCheck.java @@ -0,0 +1,497 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.health; + +import java.io.IOException; +import java.io.InputStream; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.TreeMap; +import java.util.stream.StreamSupport; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import org.apache.commons.collections.ListUtils; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.core.Context; +import org.dspace.curate.Curator; +import org.dspace.services.ConfigurationService; +import org.dspace.utils.DSpace; +import org.json.JSONArray; +import org.json.JSONObject; + +/** + * @author Milan Kuchtiak + */ +public class MetadataCheck extends Check { + + private static final String QA_METADATA_ERROR_PATTERNS_JSON = "metadata-check-patterns.json"; + private static final String VALIDATION_TYPE_OTHER = "validation.other"; + private static final int COUNT_INDENTATION = 30; + + // default values for configuration properties, which can be overridden in configuration + + // the maximum number of errors to be shown in the report + private static final int MAXIMUM_ERRORS_TO_SHOW = 100; + // the maximum number of warnings to be shown in the report + private static final int MAXIMUM_WARNINGS_TO_SHOW = 50; + // This number is only relevant when the number of errors exceeds the maximum number of errors to be shown. + // Represents the dispersion of the error messages. + // The frequency of the new (upcoming) message in the report is compared with the frequency of the + // most frequent message in the report, and the replacement is made when the frequency of the new message + // is significantly lower than the frequency of the most frequent message + // (when the difference in occurrence is higher than the dispersion quota). + private static final int ERROR_DISPERSION_QUOTA = 5; + // the same as ERROR_DISPERSION_QUOTA but for warnings + private static final int WARNING_DISPERSION_QUOTA = 5; + + private static Map> errorPatterns; + private static Map> warningPatterns; + + static { + try { + loadPatterns(); + } catch (IOException e) { + throw new RuntimeException("Cannot load error patterns", e); + } + } + + @Override + public String run(ReportInfo ri) { + + ConfigurationService configurationService = new DSpace().getConfigurationService(); + + int maxErrorsToShow = configurationService.getIntProperty("healthcheck.metadata.max-errors-to-show", + MAXIMUM_ERRORS_TO_SHOW); + + int maxWarningsToShow = configurationService.getIntProperty("healthcheck.metadata.max-warnings-to-show", + MAXIMUM_WARNINGS_TO_SHOW); + + int errorDispersionQuota = configurationService.getIntProperty("healthcheck.metadata.error-dispersion-quota", + ERROR_DISPERSION_QUOTA); + + int warningDispersionQuota = + configurationService.getIntProperty("healthcheck.metadata.warning-dispersion-quota", + WARNING_DISPERSION_QUOTA); + + StringBuilder sb = new StringBuilder(); + JSONObject root = new JSONObject(); + + Curator curator = new Curator(); + curator.addTask("metadataqa"); + + MetadataReporter reporter = new MetadataReporter( + maxErrorsToShow, + maxWarningsToShow, + errorDispersionQuota, + warningDispersionQuota); + + curator.setReporter(reporter); + try (Context context = new Context()) { + curator.curate(context, ContentServiceFactory.getInstance().getSiteService().findSite(context).getHandle()); + context.complete(); + } catch (IOException | SQLException e) { + error(e, "Error during curation"); + } + + Map errorCounts = reporter.getErrorCount(); + int overallErrorCount = errorCounts.values().stream().mapToInt(Integer::intValue).sum(); + + Map warningCounts = reporter.getWarningCount(); + int overallWarningCount = warningCounts.values().stream().mapToInt(Integer::intValue).sum(); + + Map> errorMessages = reporter.getErrorMessages(); + Map> warningMessages = reporter.getWarningMessages(); + + // error statistics + if (overallErrorCount > 0) { + sb.append("\nError statistics:\n\n"); + errorCounts.forEach((key, val) -> { + String errorCode = formatErrorCode(key); + sb.append(errorCode).append(" ".repeat(COUNT_INDENTATION - errorCode.length())) + .append(String.format("%7d", val)).append("\n"); + }); + sb.append("-".repeat(COUNT_INDENTATION + 7)).append("\n"); + sb.append("Error count total: ") + .append(" ".repeat(COUNT_INDENTATION - "Error count total: ".length())) + .append(String.format("%7d", overallErrorCount)).append("\n"); + } + + // warning statistics + if (overallWarningCount > 0) { + sb.append("\nWarning statistics:\n\n"); + warningCounts.forEach((key, val) -> { + String errorCode = formatErrorCode(key); + sb.append(errorCode).append(" ".repeat(COUNT_INDENTATION - errorCode.length())) + .append(String.format("%7d", val)).append("\n"); + }); + sb.append("-".repeat(COUNT_INDENTATION + 7)).append("\n"); + sb.append("Warning count total: ") + .append(" ".repeat(COUNT_INDENTATION - "Warning count total: ".length())) + .append(String.format("%7d", overallWarningCount)).append("\n"); + } + + // list of errors + if (overallErrorCount > 0) { + sb.append("\nErrors:\n"); + errorMessages.forEach((key, messages) -> + messages.forEach(message -> sb.append(message).append("\n")) + ); + if (overallErrorCount > maxErrorsToShow) { + sb.append("and more...\n"); + } + } + + // list of warnings + if (overallWarningCount > 0) { + sb.append("\nWarnings:\n"); + warningMessages.forEach((key, messages) -> + messages.forEach(message -> sb.append(message).append("\n")) + ); + if (overallWarningCount > maxWarningsToShow) { + sb.append("and more...\n"); + } + } + + // populate JSON report + root.put("errorCount", overallErrorCount); + root.put("warningCount", overallWarningCount); + + JSONArray errors = new JSONArray(); + errorCounts.forEach((key, val) -> { + JSONObject error = new JSONObject() + .put("type", key) + .put("count", val); + errors.put(error); + }); + root.put("errors", errors); + + JSONArray warnings = new JSONArray(); + warningCounts.forEach((key, val) -> { + JSONObject warning = new JSONObject() + .put("type", key) + .put("count", val); + warnings.put(warning); + }); + root.put("warnings", warnings); + + this.setReportJson(root); + return sb.toString(); + } + + private static void loadPatterns() throws IOException { + try (InputStream qaMetadataErrors = Thread.currentThread() + .getContextClassLoader().getResourceAsStream(QA_METADATA_ERROR_PATTERNS_JSON);) { + if (qaMetadataErrors == null) { + throw new IOException("Resource '" + QA_METADATA_ERROR_PATTERNS_JSON + + "' not found in classpath"); + } + JsonNode root = new ObjectMapper().readTree(qaMetadataErrors); + + // Load error types and their associated error patterns + errorPatterns = getPatterns(root.withObject("errors")); + // Load warning types and their associated warning patterns + warningPatterns = getPatterns(root.withObject("warnings")); + } + } + + private static Map> getPatterns(JsonNode parentNode) { + Map> validationPatterns = new HashMap<>(); + parentNode.fieldNames().forEachRemaining(validationType -> { + List validationMessages = new ArrayList<>(); + ArrayNode patterns = parentNode.withArray(validationType); + StreamSupport.stream(patterns.spliterator(), false).forEach(message -> { + validationMessages.add(message.asText()); + }); + validationPatterns.put(validationType, validationMessages); + }); + + return validationPatterns; + } + + private static String formatErrorCode(String errorCode) { + if (errorCode.startsWith("validation.")) { + String validationCode = errorCode.substring("validation.".length()); + return validationCode.replaceAll("\\.", " ") + " issues: "; + } else { + return errorCode + " issues: "; + } + } + + private static class MetadataReporter implements Appendable { + + private final int maxErrorsToShow; + private final int maxWarningsToShow; + private final int errorDispersionQuota; + private final int warningDispersionQuota; + + private final Map errorCount = new TreeMap<>(); + private final Map warningCount = new TreeMap<>(); + + // represent stored messages for errors and warnings + private final StoredMessagesInfo errorMessages = new StoredMessagesInfo(); + private final StoredMessagesInfo warningMessages = new StoredMessagesInfo(); + + Map> getErrorMessages() { + return errorMessages.getStoredMessages(); + } + + Map getErrorCount() { + return errorCount; + } + + Map> getWarningMessages() { + return warningMessages.getStoredMessages(); + } + + Map getWarningCount() { + return warningCount; + } + + MetadataReporter(int maxErrorsToShow, + int maxWarningsToShow, + int errorDispersionQuota, + int warningDispersionQuota) { + this.maxErrorsToShow = maxErrorsToShow; + this.maxWarningsToShow = maxWarningsToShow; + this.errorDispersionQuota = errorDispersionQuota; + this.warningDispersionQuota = warningDispersionQuota; + } + + @Override + public Appendable append(CharSequence cs) throws IOException { + String line = cs.toString(); + if (line.contains("ERROR! ")) { + populateData( + "ERROR! ", + line, + errorPatterns, + errorCount, + errorMessages, + maxErrorsToShow, + errorDispersionQuota + ); + } else if (line.contains("Warning: ")) { + populateData( + "Warning: ", + line, + warningPatterns, + warningCount, + warningMessages, + maxWarningsToShow, + warningDispersionQuota + ); + } + return this; + } + + @Override + public Appendable append(CharSequence cs, int i, int i1) throws IOException { + return this.append(cs.subSequence(i, i1)); + } + + @Override + public Appendable append(char c) throws IOException { + return this.append(String.valueOf(c)); + } + + private void populateData(String prefix, + String line, + Map> patterns, + Map counts, + StoredMessagesInfo storedMessagesInfo, + int limit, + int dispersionQuota + ) { + int startIndex = line.indexOf(prefix) + prefix.length(); + String fullMessage = line.substring(startIndex); + int endIndex = fullMessage.lastIndexOf("[["); + String messageKey; + if (endIndex > 0) { + messageKey = fullMessage.substring(0, endIndex - 1); + } else { + messageKey = fullMessage; + } + Message message = new Message(messageKey, fullMessage); + boolean found = false; + for (Map.Entry> entry : patterns.entrySet()) { + String type = entry.getKey(); + List typePatterns = entry.getValue(); + for (String pattern : typePatterns) { + boolean startsWithCaret = pattern.startsWith("^"); + boolean endsWithDollar = pattern.endsWith("$"); + if ((startsWithCaret && messageKey.startsWith(pattern.substring(1))) || + (endsWithDollar && messageKey.endsWith(pattern.substring(0, pattern.length() - 1))) || + (!startsWithCaret && !endsWithDollar && messageKey.contains(pattern)) + ) { + addMessage(type, message, counts, storedMessagesInfo, limit, dispersionQuota); + found = true; + break; + } + } + if (found) { + break; // If a pattern is found, no need to check other patterns for this message + } + } + if (!found) { + // If no pattern matched, categorize under "validation.other" + addMessage(VALIDATION_TYPE_OTHER, message, counts, storedMessagesInfo, limit, dispersionQuota); + } + } + + private void addMessage(String validationType, + Message message, + Map counts, + StoredMessagesInfo storedMessagesInfo, + int limit, + int dispersionQuota) { + // increase the count for this validation type + counts.merge(validationType, 1, Integer::sum); + int mCount = storedMessagesInfo.getCount(); + Map> messages = storedMessagesInfo.getStoredMessages(); + if (mCount < limit) { + // add error|warning to messages and increase the overall messages count + messages.merge(message.getMessageKey(), List.of(message.getFullMessage()), ListUtils::union); + storedMessagesInfo.count++; + } else { + // replace one of the stored messages with new message when possible + // but don't change the overall messages count + replaceMessage(message, storedMessagesInfo, dispersionQuota); + } + } + + /** + * Try to replace one of the stored messages, with the highest frequency, with the new message. + * The replacement is made when the new message is entirely new + * or the frequency of the new message is significantly lower than the messages with the highest frequency. + * + * @param message the new message that should be added to stored messages + * @param storedMessagesInfo the messages that are already stored for the report + * @param dispersionQuota quota saying how much of the messages with the highest frequency is acceptable to keep + * comparing to the frequency of the new message + */ + private void replaceMessage(Message message, StoredMessagesInfo storedMessagesInfo, int dispersionQuota) { + int highestMessageFrequency = storedMessagesInfo.getHighestFrequency(); + if (highestMessageFrequency <= 1) { + // no replacement, as there are no messages with the frequency higher than 1, so the replacement + // of any message would not increase the diversity of messages in stored messages + return; + } + String messageKey = message.getMessageKey(); + Map> storedMessages = storedMessagesInfo.getStoredMessages(); + List storedMessagesForMessageKey = storedMessages.get(messageKey); + + if (storedMessagesForMessageKey != null && + (storedMessagesForMessageKey.size() + dispersionQuota >= highestMessageFrequency)) { + // no replacement, as the frequency of the new message is not significantly lower + // than the frequency of the message with the highest frequency + return; + } + + // recalculate the highest frequency of messages for any short message in storedmessages, + // because it can be changed after each replacement + String messageKeyWithHighestFrequency = Objects.requireNonNull(getMessageWithHighestCount(storedMessages)); + highestMessageFrequency = storedMessages.get(messageKeyWithHighestFrequency).size(); + storedMessagesInfo.setHighestFrequency(highestMessageFrequency); + + if (highestMessageFrequency <= 1) { + // no replacement, as there are no messages with the frequency higher than 1 anymore + // (after the recalculation) + return; + } + + if (storedMessagesForMessageKey == null || + (storedMessagesForMessageKey.size() + dispersionQuota < highestMessageFrequency)) { + // either (1) message key is not present in stored messages yet, + // so the last stored message with the highest frequency is removed and this new message is added + // + // or (2) message key is present in stored messages, + // but the frequency of messages for this message key is much lower + // than the frequency of other stored messages, + // so the (last) stored message with the highest frequency is removed and this new message is added + storedMessages.get(messageKeyWithHighestFrequency).remove(highestMessageFrequency - 1); + storedMessages.merge(messageKey, List.of(message.getFullMessage()), ListUtils::union); + } + } + + private static String getMessageWithHighestCount(Map> storedMessages) { + return storedMessages.entrySet() + .stream() + .max((e1, e2) -> Integer.compare(e1.getValue().size(), e2.getValue().size())) + .map(Map.Entry::getKey) + .orElseThrow(); + } + } + + /** + * Abstraction of the stored messages for errors and warnings, which are stored during the processing of messages + * and then used to generate the final report. + * The messages are stored in te form of a map, where the key is the message_key and + * the value is the list of full messages stored for this message_key. + * The count represents the overall number of stored messages. + * + * Example of message_key: "value [dc.date.available] is present multiple times" + * Example of the list of full messages: + * [ + * "value [dc.date.available] is present multiple times [[http://hdl.handle.net/123456789/2-7371]]", + * "value [dc.date.available] is present multiple times [[http://hdl.handle.net/123456789/2-7373]]", + * "value [dc.date.available] is present multiple times [[http://hdl.handle.net/123456789/2-7375]]" + * ] + * + */ + private static class StoredMessagesInfo { + private int count; + private int highestFrequency; + private final Map> storedMessages; + + StoredMessagesInfo() { + this.count = 0; + this.highestFrequency = Integer.MAX_VALUE; + storedMessages = new TreeMap<>(); + } + + public int getCount() { + return count; + } + + public int getHighestFrequency() { + return highestFrequency; + } + + public void setHighestFrequency(int highestFrequency) { + this.highestFrequency = highestFrequency; + } + + public Map> getStoredMessages() { + return storedMessages; + } + } + + private static class Message { + private final String messageKey; + private final String fullMessage; + + public Message(String messageKey, String fullMessage) { + this.messageKey = messageKey; + this.fullMessage = fullMessage; + } + + public String getMessageKey() { + return messageKey; + } + + public String getFullMessage() { + return fullMessage; + } + } +} diff --git a/dspace-api/src/main/resources/metadata-check-patterns.json b/dspace-api/src/main/resources/metadata-check-patterns.json new file mode 100644 index 000000000000..bcb4af25a6b3 --- /dev/null +++ b/dspace-api/src/main/resources/metadata-check-patterns.json @@ -0,0 +1,52 @@ +{ + "errors": { + "dc.type": [ + "^Does not have dc.type metadata", + "^dc.type has null value", + "^leading or trailing spaces", + "^empty value", + "^invalid type (" + ], + "dc.language": [ + "^dc.language.iso", + "^Invalid language code", + "^local.language.name" + ], + "dc.title": [ + "^Item has no dc.title metadata", + "^Title " + ], + "dc.relation": [ + "^contains 'dc.relation.", + "^the referenced item" + ], + "dc.rights": [ + "^has labels ", + "^There are bitstreams but incomplete rights metadata." + ], + "dc.description": [ + "^contains suspicious dc.description.uri metadata" + ], + "local.branding": [ + "^local.branding " + ], + "validation.duplicate.value": [ + "is present multiple times$" + ], + "validation.missing.handle": [ + "^Does not have a handle" + ], + "validation.empty.value": [ + " is empty$", + " is null$" + ], + "validation.complex.type": [ + " is a component with " + ] + }, + "warnings": { + "dc.subject": [ + "^does not contain any [dc.subject] values" + ] + } +} \ No newline at end of file diff --git a/dspace-api/src/main/resources/report-diff-fields.json b/dspace-api/src/main/resources/report-diff-fields.json index a1e1f687aefc..f4062d857b6a 100644 --- a/dspace-api/src/main/resources/report-diff-fields.json +++ b/dspace-api/src/main/resources/report-diff-fields.json @@ -23,7 +23,9 @@ "/checks/2/report/subscribers": "Subscribers", "/checks/2/report/subscribedCollections": "Subscribed Collections", "/checks/2/report/emptyGroups": "Empty Groups", - "/checks/3/report/licenses": "Licenses" + "/checks/3/report/licenses": "Licenses", + "/checks/5/report/errorCount": "Metadata Errors", + "/checks/5/report/warningCount": "Metadata Warnings" }, "fieldOrder": [ "/checks/0/report/directoryStats/0/size_bytes", @@ -49,6 +51,8 @@ "/checks/2/report/subscribers", "/checks/2/report/subscribedCollections", "/checks/2/report/emptyGroups", - "/checks/3/report/licenses" + "/checks/3/report/licenses", + "/checks/5/report/errorCount", + "/checks/5/report/warningCount" ] } \ No newline at end of file diff --git a/dspace-api/src/test/java/org/dspace/scripts/HealthReportIT.java b/dspace-api/src/test/java/org/dspace/scripts/HealthReportIT.java index e77a907ef731..8ae86af38339 100644 --- a/dspace-api/src/test/java/org/dspace/scripts/HealthReportIT.java +++ b/dspace-api/src/test/java/org/dspace/scripts/HealthReportIT.java @@ -8,6 +8,8 @@ package org.dspace.scripts; import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.hasItem; @@ -20,6 +22,10 @@ import java.util.List; import java.util.Set; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import org.apache.commons.lang3.StringUtils; import org.dspace.AbstractIntegrationTestWithDatabase; import org.dspace.app.launcher.ScriptLauncher; import org.dspace.app.scripts.handler.impl.TestDSpaceRunnableHandler; @@ -31,6 +37,8 @@ import org.dspace.content.Collection; import org.dspace.content.Community; import org.dspace.content.Item; +import org.dspace.content.MetadataValue; +import org.dspace.content.ReportResult; import org.dspace.content.clarin.ClarinLicense; import org.dspace.content.clarin.ClarinLicenseLabel; import org.dspace.content.clarin.ClarinLicenseResourceMapping; @@ -38,10 +46,13 @@ import org.dspace.content.factory.ContentServiceFactory; import org.dspace.content.service.BitstreamService; import org.dspace.content.service.BundleService; +import org.dspace.content.service.ReportResultService; import org.dspace.content.service.clarin.ClarinLicenseLabelService; import org.dspace.content.service.clarin.ClarinLicenseResourceMappingService; import org.dspace.content.service.clarin.ClarinLicenseService; import org.dspace.core.Constants; +import org.dspace.services.ConfigurationService; +import org.dspace.services.factory.DSpaceServicesFactory; import org.junit.Test; /** @@ -139,4 +150,234 @@ public void testLicenseCheck() throws Exception { assertThat(messages, hasItem(containsString("UUIDs of items without license bundle:"))); assertThat(messages, hasItem(containsString("PUB"))); } + + @Test + public void testMetadataCheck() throws Exception { + context.turnOffAuthorisationSystem(); + + Community community = CommunityBuilder.createCommunity(context) + .withName("Community") + .build(); + + Collection collection = CollectionBuilder.createCollection(context, community) + .withName("Collection") + .withSubmitterGroup(eperson) + .build(); + + Item item1 = ItemBuilder.createItem(context, collection) + .withTitle("Test item 1") + .withType("corpus") + .withMetadata("local", "branding", null, "Community") + .build(); + + Item item2 = ItemBuilder.createItem(context, collection) + .withTitle("Test item 2") + .withType("toolService") + .withSubject("Test subject") + .withMetadata("local", "branding", null, "Community") + .withMetadata("dc", "relation", "replaces", findItemUri(item1)) + .build(); + + ItemBuilder.createItem(context, collection) + .withTitle("Test item 3") + .withType("toolService") + .withSubject("Test subject") + .withMetadata("local", "branding", null, "Community") + .withMetadata("dc", "relation", "isreplacedby", findItemUri(item2)) + .build(); + + ItemBuilder.createItem(context, collection) + .withTitle("Test item 4") + .withMetadata("local", "branding", null, "Community") + .build(); + + ItemBuilder.createItem(context, collection) + .withType("toolService") + .withMetadata("local", "branding", null, "Community") + .build(); + + TestDSpaceRunnableHandler testDSpaceRunnableHandler = new TestDSpaceRunnableHandler(); + + // with "health-report -c 5", only Metadata check is running + String[] args = new String[]{"health-report", "-c", "5"}; + ScriptLauncher.handleScript(args, ScriptLauncher.getConfig(kernelImpl), testDSpaceRunnableHandler, kernelImpl); + + assertThat(testDSpaceRunnableHandler.getErrorMessages(), empty()); + List messages = testDSpaceRunnableHandler.getInfoMessages(); + + assertThat(messages, hasSize(1)); + assertThat(messages.get(0), containsString("dc.relation issues: " + " ".repeat(15) + "2")); + assertThat(messages.get(0), containsString("dc.title issues: " + " ".repeat(15) + "1")); + assertThat(messages.get(0), containsString("dc.type issues: " + " ".repeat(15) + "1")); + assertThat(messages.get(0), containsString("Error count total: " + " ".repeat(15) + "4")); + assertThat(messages.get(0), containsString("dc.subject issues: " + " ".repeat(15) + "1")); + assertThat(messages.get(0), containsString("Warning count total: " + " ".repeat(15) + "1")); + assertThat(messages.get(0), containsString("Errors:")); + assertThat(messages.get(0), containsString("Does not have dc.type metadata")); + assertThat(messages.get(0), containsString("Item has no dc.title metadata")); + assertThat(messages.get(0), containsString("does not refer back via dc.relation.isreplacedby")); + assertThat(messages.get(0), containsString("does not refer back via dc.relation.replaces")); + assertThat(messages.get(0), containsString("Warnings:")); + assertThat(messages.get(0), containsString("does not contain any [dc.subject] values")); + + ReportResultService reportResultService = ContentServiceFactory.getInstance().getReportResultService(); + List reportResults = reportResultService.findAll(context); + ReportResult reportResult = findLastReportResult(reportResults); + assertThat(reportResult.getType(), is("healthcheck")); + + JsonNode root = new ObjectMapper().readTree(reportResult.getValue()); + JsonNode metadataCheckNode = findCheckByName(root, "Metadata check"); + assertThat(metadataCheckNode, notNullValue()); + + JsonNode reportNode = metadataCheckNode.get("report"); + assertThat(reportNode, notNullValue()); + + assertThat(reportNode.get("errorCount").asInt(), is(4)); + assertThat(reportNode.get("warningCount").asInt(), is(1)); + + ArrayNode errorsNode = reportNode.withArray("errors"); + assertThat(errorsNode.size(), is(3)); + + assertThat(errorsNode.get(0).get("count").asInt(), is(2)); + assertThat(errorsNode.get(0).get("type").asText(), is("dc.relation")); + + assertThat(errorsNode.get(1).get("count").asInt(), is(1)); + assertThat(errorsNode.get(1).get("type").asText(), is("dc.title")); + + assertThat(errorsNode.get(2).get("count").asInt(), is(1)); + assertThat(errorsNode.get(2).get("type").asText(), is("dc.type")); + + ArrayNode warningsNode = reportNode.withArray("warnings"); + assertThat(warningsNode.size(), is(1)); + assertThat(warningsNode.get(0).get("count").asInt(), is(1)); + assertThat(warningsNode.get(0).get("type").asText(), is("dc.subject")); + } + + @Test + public void testMetadataCheckWithRestrictedReportSize() throws Exception { + // set max-errors-to-show to 8 and error-dispersion-quota to 1, + // This test has 14 errors in total, but the report will contain only 8 error messages. + // The errors with low frequency will be prioritized. + // The error-dispersion-quota set to 1 means that the number of errors shown + // for each error will be almost the same, in this case maximally 2 errors for each error type + context.turnOffAuthorisationSystem(); + + ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); + configurationService.setProperty("healthcheck.metadata.max-errors-to-show", 8); + configurationService.setProperty("healthcheck.metadata.error-dispersion-quota", 1); + + Community community = CommunityBuilder.createCommunity(context) + .withName("Community") + .build(); + + Collection collection = CollectionBuilder.createCollection(context, community) + .withName("Collection") + .withSubmitterGroup(eperson) + .build(); + + Item item1 = ItemBuilder.createItem(context, collection) + .withTitle("Test item 1") + .withType("corpus") + .withSubject("Test subject") + .withMetadata("local", "branding", null, "Community") + .build(); + + Item item2 = ItemBuilder.createItem(context, collection) + .withTitle("Test item 2") + .withType("toolService") + .withSubject("Test subject") + .withMetadata("local", "branding", null, "Community") + .withMetadata("dc", "relation", "replaces", findItemUri(item1)) + .build(); + + ItemBuilder.createItem(context, collection) + .withTitle("Test item 3") + .withType("toolService") + .withSubject("Test subject") + .withMetadata("local", "branding", null, "Community") + .withMetadata("dc", "relation", "isreplacedby", findItemUri(item2)) + .build(); + + // create 4 items with missing title + for (int i = 0; i < 4; i++) { + ItemBuilder.createItem(context, collection) + .withType("toolService") + .withSubject("Test subject") + .withMetadata("local", "branding", null, "Community") + .build(); + } + + // create 4 items with missing type + for (int i = 4; i < 8; i++) { + ItemBuilder.createItem(context, collection) + .withTitle("Test Item " + i) + .withSubject("Test subject") + .withMetadata("local", "branding", null, "Community") + .build(); + } + + // create 4 items with duplicate type + for (int i = 8; i < 12; i++) { + ItemBuilder.createItem(context, collection) + .withTitle("Test Item " + i) + .withType("toolService") + .withType("corpus") + .withSubject("Test subject") + .withMetadata("local", "branding", null, "Community") + .build(); + } + + TestDSpaceRunnableHandler testDSpaceRunnableHandler = new TestDSpaceRunnableHandler(); + + // with "health-report -c 5", only Metadata check is running + String[] args = new String[]{"health-report", "-c", "5"}; + ScriptLauncher.handleScript(args, ScriptLauncher.getConfig(kernelImpl), testDSpaceRunnableHandler, kernelImpl); + + assertThat(testDSpaceRunnableHandler.getErrorMessages(), empty()); + List messages = testDSpaceRunnableHandler.getInfoMessages(); + + assertThat(messages, hasSize(1)); + assertThat(messages.get(0), containsString("dc.relation issues: " + " ".repeat(15) + "2")); + assertThat(messages.get(0), containsString("dc.title issues: " + " ".repeat(15) + "4")); + assertThat(messages.get(0), containsString("dc.type issues: " + " ".repeat(15) + "4")); + assertThat(messages.get(0), containsString("duplicate value issues:" + " ".repeat(13) + "4")); + assertThat(messages.get(0), containsString("Error count total: " + " ".repeat(14) + "14")); + + assertThat(messages.get(0), containsString("Errors:")); + + // check if dc.type error is present exactly 2 times + assertThat(StringUtils.countMatches(messages.get(0), "Does not have dc.type metadata"), is(2)); + // check if dc.title error is present exactly 2 times + assertThat(StringUtils.countMatches(messages.get(0), "Item has no dc.title metadata"), is(2)); + // check id duplicate value error is present exactly 2 times + assertThat(StringUtils.countMatches(messages.get(0), "value [dc.type] is present multiple times"), is(2)); + + // check if all dc.relation errors are present + assertThat(StringUtils.countMatches(messages.get(0), "does not refer back via dc.relation.replaces"), is(1)); + assertThat( + StringUtils.countMatches(messages.get(0), "does not refer back via dc.relation.isreplacedby"), is(1)); + assertThat(messages.get(0), containsString("and more...")); + } + + private String findItemUri(Item item) { + return item.getMetadata().stream() + .filter(metadataValue -> "dc_identifier_uri".equals(metadataValue.getMetadataField().toString())) + .findFirst() + .map(MetadataValue::getValue) + .orElse(null); + } + + ReportResult findLastReportResult(List reportResults) { + return reportResults.stream().max((reportResult1, reportResult2) -> + reportResult1.getLastModified().compareTo(reportResult2.getLastModified())).orElseThrow(); + } + + JsonNode findCheckByName(JsonNode root, String checkName) { + for (JsonNode check : root.get("checks")) { + if (check.get("name").asText().equals(checkName)) { + return check; + } + } + return null; + } } \ No newline at end of file diff --git a/dspace/config/modules/healthcheck.cfg b/dspace/config/modules/healthcheck.cfg index 972051d94ace..027e30391e26 100644 --- a/dspace/config/modules/healthcheck.cfg +++ b/dspace/config/modules/healthcheck.cfg @@ -8,7 +8,8 @@ healthcheck.checks = General Information,\ Item summary,\ User summary,\ License summary,\ - Embargo check + Embargo check,\ + Metadata check plugin.named.org.dspace.health.Check = \ org.dspace.health.InfoCheck = General Information,\ @@ -18,7 +19,8 @@ plugin.named.org.dspace.health.Check = \ org.dspace.health.ItemCheck = Item summary,\ org.dspace.health.UserCheck = User summary,\ org.dspace.health.LogAnalyserCheck = Log Analyser Check,\ - org.dspace.health.LicenseCheck = License summary + org.dspace.health.LicenseCheck = License summary,\ + org.dspace.health.MetadataCheck = Metadata check # default value of the report from the last N days (where dates are applicable) healthcheck.last_n_days = 7