Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 50 additions & 85 deletions article/sources/xmlsps.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from packtools.sps.models.kwd_group import ArticleKeywords
from packtools.sps.models.v2.article_toc_sections import ArticleTocSections
from packtools.sps.models.v2.related_articles import RelatedArticles
from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre

from article import choices
from article.models import (
Expand All @@ -42,7 +41,7 @@
from issue.articlemeta.loader import load_issue_sections
from journal.models import Journal
from location.models import Location
from pid_provider.choices import PPXML_STATUS_DONE, PPXML_STATUS_INVALID
from pid_provider.choices import PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, PPXML_STATUS_INVALID
from pid_provider.models import PidProviderXML
# Researcher no longer used - replaced by ContribPerson
# from researcher.models import Affiliation, Researcher
Expand Down Expand Up @@ -70,7 +69,7 @@ def add_error(errors, function_name, error, **kwargs):
errors.append(error_dict)


def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
def load_article(user, pp_xml):
"""
Carrega um artigo a partir de XML.

Expand All @@ -80,10 +79,7 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):

Args:
user: Usuário responsável pela operação (obrigatório)
xml: String contendo o XML do artigo (opcional)
file_path: Caminho para o arquivo XML (opcional)
v3: PID v3 do artigo (opcional)
pp_xml: Objeto PidProviderXML relacionado (opcional)
pp_xml: Objeto PidProviderXML relacionado (obrigatório)

Returns:
Article: Instância do artigo processado com todos os relacionamentos
Expand All @@ -98,76 +94,65 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
- O processamento continua mesmo com falhas parciais
- O campo article.valid indica se o processamento foi completo
"""
logging.info(f"load article {pp_xml} {v3} {file_path}")
errors = []
article = None # Inicializar no início
logging.info(f"load article {pp_xml}")
detail = {"pp_xml": str(pp_xml)}

# Validações iniciais
if not user:
raise ValueError("User is required")

if not any([pp_xml, v3, file_path, xml]):
if not pp_xml:
raise ValueError(
"load_article() requires params: pp_xml or v3 or file_path or xml"
"load_article() requires params: pp_xml"
)

if not pp_xml and v3:
try:
pp_xml = PidProviderXML.get_by_pid_v3(pid_v3=v3)
except PidProviderXML.DoesNotExist:
pp_xml = None

try:
if pp_xml:
xml_with_pre = pp_xml.xml_with_pre
elif file_path:
for xml_with_pre in XMLWithPre.create(file_path):
xmltree = xml_with_pre.xmltree
break
elif xml:
xml_with_pre = XMLWithPre("", etree.fromstring(xml))
xml_with_pre = pp_xml.xml_with_pre
except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
UnexpectedEvent.create(
item=str(pp_xml or v3 or file_path or "xml"),
action="article.sources.xmlsps.load_article",
exception=e,
exc_traceback=exc_traceback,
detail=dict(
function="article.sources.xmlsps.load_article",
xml=f"{xml}",
v3=v3,
file_path=file_path,
pp_xml=str(pp_xml),
),
)
item = str(pp_xml or v3 or file_path or "xml")
if pp_xml:
pp_xml.proc_status = PPXML_STATUS_INVALID
pp_xml.save()
updated = (
Article.objects.filter(pid_v3=pp_xml.v3)
.exclude(
pp_xml=pp_xml,
data_status=choices.DATA_STATUS_INVALID,
)
.update(
pp_xml=pp_xml,
data_status=choices.DATA_STATUS_INVALID,
)
updated = (
Article.objects.filter(pp_xml=pp_xml)
.exclude(
data_status=choices.DATA_STATUS_INVALID,
)
raise ValueError(f"Unable to get XML to load article from {item}: {e}")
.update(
data_status=choices.DATA_STATUS_INVALID,
)
)
errors = [
{
"function": "load_article",
"error_type": e.__class__.__name__,
"error_message": str(e),
"timestamp": datetime.now().isoformat(),
}
]
pp_xml.add_event(name="load_article", proc_status=PPXML_STATUS_INVALID, detail=detail, errors=errors, exceptions=e)
raise ValueError(f"Unable to get XML to load article from {pp_xml}: {e}")

pid_v3 = v3 or xml_with_pre.v3

try:
# Sequência organizada para atribuição de campos do Article
# Do mais simples (campos diretos) para o mais complexo (FKs e M2M)
errors = []
article = None
event = None

xmltree = xml_with_pre.xmltree

pid_v3 = xml_with_pre.v3
sps_pkg_name = xml_with_pre.sps_pkg_name
logging.info(f"Article {pid_v3} {sps_pkg_name}")

logging.info(f"Pid Provider XML: {pid_v3} {sps_pkg_name}")

journal = get_journal(xmltree=xmltree, errors=errors)
if not journal:
raise ValueError(f"Not found journal for pid provider xml: {pid_v3} {sps_pkg_name}")
issue = get_issue(
xmltree=xmltree,
journal=journal,
item=pid_v3,
errors=errors,
)
if not issue:
raise ValueError(f"Not found issue for pid provider xml: {pid_v3} {sps_pkg_name}")

# CRIAÇÃO/OBTENÇÃO DO OBJETO PRINCIPAL
article = Article.create_or_update(
Expand Down Expand Up @@ -201,19 +186,9 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
)

# FOREIGN KEYS SIMPLES
article.journal = get_journal(xmltree=xmltree, errors=errors)
if not article.journal:
article.save()
raise ValueError(f"Not found journal for article: {pid_v3}")
article.issue = get_issue(
xmltree=xmltree,
journal=article.journal,
item=pid_v3,
errors=errors,
)
if not article.issue:
article.save()
raise ValueError(f"Not found issue for article: {pid_v3}")
article.journal = journal
article.issue = issue
article.save()

# Salvar uma vez após definir todos os campos simples
logging.info(
Expand Down Expand Up @@ -286,19 +261,9 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None):
if event:
event.finish(errors=errors, exceptions=traceback.format_exc())
raise
UnexpectedEvent.create(
item=str(pp_xml or v3 or file_path or "xml"),
action="article.sources.xmlsps.load_article",
exception=e,
exc_traceback=exc_traceback,
detail=dict(
function="article.sources.xmlsps.load_article",
xml=f"{xml}",
v3=v3,
file_path=file_path,
pp_xml=str(pp_xml),
),
)

pp_xml.add_event(name="load_article", proc_status=PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, detail=detail, errors=errors, exceptions=e)

raise


Expand Down
3 changes: 3 additions & 0 deletions pid_provider/choices.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
PPXML_STATUS_DONE = "DONE"
PPXML_STATUS_UNDEF = "UNDEF"
PPXML_STATUS_INVALID = "NVALID"
PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE = "UNMATCH"

PPXML_STATUS_DUPLICATED = "DUP"
PPXML_STATUS_DEDUPLICATED = "DEDUP"
PPXML_STATUS = (
Expand All @@ -19,4 +21,5 @@
(PPXML_STATUS_INVALID, _("invalid")),
(PPXML_STATUS_DUPLICATED, _("duplicated")),
(PPXML_STATUS_DEDUPLICATED, _("deduplicated")),
(PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, _("unmatched journal or issue")),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Generated by Django 5.2.7 on 2026-03-26 20:52

import django.db.models.deletion
import modelcluster.fields
from django.conf import settings
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("pid_provider", "0015_alter_xmlversion_file_xmlurl"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]

operations = [
migrations.AlterField(
model_name="pidproviderxml",
name="proc_status",
field=models.CharField(
blank=True,
choices=[
("TODO", "To do"),
("DONE", "Done"),
("WAIT", "waiting"),
("IGNORE", "ignore"),
("UNDEF", "undefined"),
("NVALID", "invalid"),
("DUP", "duplicated"),
("DEDUP", "deduplicated"),
("UNMATCH", "unmatched journal or issue"),
],
default="TODO",
max_length=7,
null=True,
verbose_name="processing status",
),
),
migrations.CreateModel(
name="XMLEvent",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"updated",
models.DateTimeField(
auto_now=True, verbose_name="Last update date"
),
),
("name", models.CharField(max_length=200, verbose_name="name")),
("detail", models.JSONField(blank=True, null=True)),
(
"created",
models.DateTimeField(
auto_now_add=True, verbose_name="Creation date"
),
),
("completed", models.BooleanField(default=False)),
(
"creator",
models.ForeignKey(
editable=False,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="%(class)s_creator",
to=settings.AUTH_USER_MODEL,
verbose_name="Creator",
),
),
(
"ppxml",
modelcluster.fields.ParentalKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="events",
to="pid_provider.pidproviderxml",
),
),
(
"updated_by",
models.ForeignKey(
blank=True,
editable=False,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="%(class)s_last_mod_user",
to=settings.AUTH_USER_MODEL,
verbose_name="Updater",
),
),
],
options={
"abstract": False,
},
),
]
45 changes: 44 additions & 1 deletion pid_provider/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
zero_to_none,
QueryBuilderPidProviderXML,
)
from tracker.models import BaseEvent, EventSaveError, UnexpectedEvent
from tracker.models import BaseEvent, UnexpectedEvent

try:
from django_prometheus.models import ExportModelOperationsMixin
Expand Down Expand Up @@ -441,12 +441,16 @@ class PidProviderXML(BasePidProviderXML, CommonControlField, ClusterableModel):
FieldPanel("z_links"),
FieldPanel("z_partial_body"),
]
panels_event = [
InlinePanel("events", label=_("Events")),
]

edit_handler = TabbedInterface(
[
ObjectList(panel_a, heading=_("Identification")),
ObjectList(panel_b, heading=_("Other PIDs")),
ObjectList(panel_c, heading=_("Data")),
ObjectList(panels_event, heading=_("Events")),
]
)

Expand Down Expand Up @@ -1447,6 +1451,11 @@ def fix_pkg_name(self, pkg_name):
self.save()
return True
return False

def add_event(self, name, proc_status, detail=None, errors=None, exceptions=None):
self.proc_status = proc_status
self.save()
return XMLEvent.register(self, name, detail=detail, errors=errors, exceptions=exceptions)


class FixPidV2(CommonControlField):
Expand Down Expand Up @@ -1748,3 +1757,37 @@ def save_file(self, xml_content, filename=None):
except Exception as e:
logging.error(f"Error saving zip file for XMLURL {self.url}: {e}")
return False


class XMLEvent(BaseEvent, CommonControlField):
"""
Model to log events related to XML processing in the PID Provider system.

This model captures various events that occur during the processing of XML data,
such as registration attempts, validation errors, and other significant actions,
along with relevant details for debugging and monitoring purposes.

Attributes:
name (CharField): Name of the event.
detail (JSONField): Detailed information about the event.
created (DateTimeField): Timestamp when the event was created.
completed (BooleanField): Indicates if the event has been completed.
ppxml (ParentalKey): Reference to the related PidProviderXML instance.

Methods:
data (property): Returns a dictionary with the event's name, detail, and creation timestamp.
create (classmethod): Creates and saves a new XMLEvent instance.
finish: Marks the event as completed and optionally updates details, errors, or exceptions.
"""
ppxml = ParentalKey(
PidProviderXML, on_delete=models.CASCADE, related_name="events"
)

@classmethod
def register(cls, ppxml, name, detail=None, errors=None, exceptions=None):
obj = cls()
obj.ppxml = ppxml
obj.name = name
completed = bool(not errors and not exceptions)
obj.finish(completed=completed, detail=detail, errors=errors, exceptions=exceptions)
return obj
Loading