diff --git a/issue/formats/articlemeta_format.py b/issue/formats/articlemeta_format.py index 690aa5ab..1a5f8886 100644 --- a/issue/formats/articlemeta_format.py +++ b/issue/formats/articlemeta_format.py @@ -1,5 +1,4 @@ from collections import defaultdict -from functools import lru_cache from article.models import Article from core.utils.articlemeta_dict_utils import ( @@ -10,6 +9,7 @@ from journal.formats.articlemeta_format import ArticlemetaJournalFormatter from journal.models import SciELOJournal, TitleInDatabase + def get_issue_type(issue): if issue.supplement: return "supplement" @@ -32,7 +32,16 @@ def __init__(self, obj, collection): self.journal = self.obj.journal self._scielo_journal = None self._medline_titles = None - self.article = Article.objects.filter(issue=self.obj, journal=self.journal) + self._article_qs = None + + @property + def article_qs(self): + """Lazy queryset — só executa query quando necessário.""" + if self._article_qs is None: + self._article_qs = Article.objects.filter( + issue=self.obj, journal=self.journal + ) + return self._article_qs @property def scielo_journal(self): @@ -49,13 +58,15 @@ def scielo_journal(self): return self._scielo_journal @property - @lru_cache(maxsize=1) def medline_titles(self): - return list( - TitleInDatabase.objects.filter( - journal=self.journal, indexed_at__acronym__iexact="medline" + """Cache manual em vez de lru_cache (evita memory leak em instância).""" + if self._medline_titles is None: + self._medline_titles = list( + TitleInDatabase.objects.filter( + journal=self.journal, indexed_at__acronym__iexact="medline" + ) ) - ) + return self._medline_titles def format(self): """Formata todos os dados do issue""" @@ -85,8 +96,6 @@ def format(self): def _format_basic_info(self): """Informações básicas do issue""" - # Path to base issue - add_multiple_to_result( { "v31": self.obj.volume, @@ -100,7 +109,6 @@ def _format_basic_info(self): }, self.result["issue"], ) - # "v6": Ordem de publicação dos fascículos para apresentação na interface if hasattr(self.obj, "issue_title"): items = [item.title for item in self.obj.issue_title.all() if item.title] @@ -214,7 +222,6 @@ def _format_title_in_database(self): def _format_metadata(self): """Metadados e relacionamentos""" - # tem que ser objeto datetime processing_date = self.obj.updated.strftime("%Y-%m-%d") key_to_code = { @@ -252,8 +259,12 @@ def _format_register_order_info(self): def _format_field_use_system(self): """Campo usado no sistema""" - if self.scielo_journal: - field_value = f"{self.scielo_journal.journal_acron.upper()}{self.obj.volume}{self.obj.number}" + if self.scielo_journal and self.scielo_journal.journal_acron: + field_value = ( + f"{self.scielo_journal.journal_acron.upper()}" + f"{self.obj.volume or ''}" + f"{self.obj.number or ''}" + ) add_to_result("v888", field_value, self.result["issue"]) def _format_legend_bibliographic(self): @@ -278,10 +289,8 @@ def _format_legend_bibliographic(self): "a": self.obj.year, "_": "", } - # Só adiciona 'v' se houver volume if self.obj.volume: entry["v"] = f"vol.{self.obj.volume}" - # Só adiciona 'n' se houver number if self.obj.number: entry["n"] = f"no.{self.obj.number}" if self.obj.season: @@ -308,28 +317,37 @@ def _format_title_summary(self): ] def _format_article_info(self): - """Informações de artigo""" - if self.article.exists(): - article_count = str(self.obj.article_set.count()) + """Informações de artigo — usa o queryset consistente (filtrado por issue + journal).""" + if self.article_qs.exists(): + article_count = str(self.article_qs.count()) add_to_result("v122", article_count, self.result["issue"]) def _format_issn_info(self): - """Informações de edição""" - if self.scielo_journal and self.scielo_journal.journal and self.scielo_journal.journal.official: - issn_print = self.scielo_journal.journal.official.issn_print - issn_electronic = self.scielo_journal.journal.official.issn_electronic - issn_scielo = self.scielo_journal.issn_scielo - add_multiple_to_result( - { - "v35": issn_scielo, - "v935": issn_electronic, - }, - self.result["issue"], - ) + """Informações de ISSN""" + if not ( + self.scielo_journal + and self.scielo_journal.journal + and self.scielo_journal.journal.official + ): + return + + official = self.scielo_journal.journal.official + issn_print = official.issn_print + issn_electronic = official.issn_electronic + issn_scielo = self.scielo_journal.issn_scielo + + add_multiple_to_result( + { + "v35": issn_scielo, + "v435": None, # preenchido abaixo por _format_issn_with_type + "v935": issn_electronic, + }, + self.result["issue"], + ) - self._format_issn_with_type(issn_print, issn_electronic) - self._format_issn_code_title(issn_print, issn_electronic) - self._format_code(issn_scielo) + self._format_issn_with_type(issn_print, issn_electronic) + self._format_issn_code_title(issn_print, issn_electronic) + self._format_code(issn_scielo) def _format_code(self, issn_scielo): """Informações de código""" @@ -346,7 +364,8 @@ def _format_issn_with_type(self, issn_print, issn_electronic): issns.append({"_": issn_print, "t": "PRINT"}) if issn_electronic: issns.append({"_": issn_electronic, "t": "ONLIN"}) - self.result["issue"]["v435"] = issns + if issns: + self.result["issue"]["v435"] = issns def _format_issn_code_title(self, issn_print, issn_electronic): self.result["code_title"] = [ @@ -355,10 +374,18 @@ def _format_issn_code_title(self, issn_print, issn_electronic): def _format_code_sections(self): data = [] - for toc in self.obj.table_of_contents.select_related("journal_toc__language").all(): + for toc in self.obj.table_of_contents.select_related( + "journal_toc", "journal_toc__language" + ).all(): journal_toc = toc.journal_toc + if not journal_toc: + continue code = getattr(journal_toc, "code", None) - lang = getattr(journal_toc.language, "code2", None) if journal_toc.language else None + lang = ( + getattr(journal_toc.language, "code2", None) + if journal_toc.language + else None + ) text = journal_toc.text if code: item = {"c": code, "_": ""} @@ -379,4 +406,4 @@ def get_articlemeta_format_issue(obj, collection): data["title"] = ArticlemetaJournalFormatter(obj.journal, collection).format() formatter_issue = ArticlemetaIssueFormatter(obj, collection).format() data.update(formatter_issue) - return data + return data \ No newline at end of file diff --git a/journal/formats/articlemeta_format.py b/journal/formats/articlemeta_format.py index 9622f566..8ee77780 100644 --- a/journal/formats/articlemeta_format.py +++ b/journal/formats/articlemeta_format.py @@ -1,5 +1,4 @@ from collections import defaultdict -from functools import lru_cache from core.utils.articlemeta_dict_utils import add_items, add_to_result from journal.models import SciELOJournal, TitleInDatabase @@ -7,13 +6,13 @@ class ArticlemetaJournalFormatter: """Formatador para dados do ArticleMeta""" - + def __init__(self, obj, collection): self.obj = obj self.collection = collection self.result = defaultdict(list) self._scielo_journal = None - self._medline_titles = None + self._titles_in_database_medline_secs = None self.official = getattr(self.obj, 'official', None) @property @@ -29,13 +28,18 @@ def scielo_journal(self): return self._scielo_journal @property - @lru_cache(maxsize=1) def titles_in_database_medline_secs(self): - titles_in_db = TitleInDatabase.objects.filter( + """Cache manual (mesmo padrão de scielo_journal) para evitar problemas do lru_cache em instância.""" + if self._titles_in_database_medline_secs is not None: + return self._titles_in_database_medline_secs + + self._titles_in_database_medline_secs = list( + TitleInDatabase.objects.filter( journal=self.obj, - indexed_at__acronym__in=["medline", "secs"] + indexed_at__acronym__in=["medline", "secs"], ).select_related("indexed_at") - return titles_in_db + ) + return self._titles_in_database_medline_secs def format(self): """Formata todos os dados do journal""" @@ -56,10 +60,10 @@ def format(self): self._format_collection_info, self._format_journal_history, ] - + for formatter in formatters: formatter() - + return dict(self.result) def _format_basic_info(self): @@ -86,34 +90,36 @@ def _format_basic_info(self): for key, value in simple_fields.items(): add_to_result(key, value, self.result) - if acronym := getattr(self.obj.vocabulary, 'acronym', None): + if acronym := getattr(self.obj.vocabulary, 'acronym', None): add_to_result("v85", acronym, self.result) - + if license := getattr(self.obj.journal_use_license, 'license_type', None): add_to_result("v541", license, self.result) - add_items("v64", [e.email for e in self.obj.journal_email.all()], self.result) - add_to_result("v117", self.obj.standard.code if self.obj.standard and self.obj.standard.code else None, self.result) + add_items("v64", [e.email for e in self.obj.journal_email.all()], self.result) + add_to_result( + "v117", + self.obj.standard.code if self.obj.standard and self.obj.standard.code else None, + self.result, + ) add_items("v350", [lang.code2 for lang in self.obj.text_language.all()], self.result) add_items("v360", [lang.code2 for lang in self.obj.abstract_language.all()], self.result) add_items("v900", [annotation.notes for annotation in self.obj.notes.all()], self.result) def _format_contact_address_info(self): address = self.obj.contact_address - try: + if address: add_items("v63", address.split("\n"), self.result) - except Exception as e: - add_to_result("v63", address, self.result) def _format_title_journal_info(self): - """Informações do Title Journalal""" + """Informações do Title Journal""" add_to_result("v150", self.obj.short_title, self.result) - if iso_short_title := getattr(self.obj.official, 'iso_short_title', None): + if iso_short_title := getattr(self.official, 'iso_short_title', None): add_to_result("v151", iso_short_title, self.result) - - if parallel_titles := getattr(self.official, 'parallel_titles', None): + + if parallel_titles := getattr(self.official, 'parallel_titles', None): add_items("v230", [pt.text for pt in parallel_titles if pt.text], self.result) - + add_items("v240", [other_title.title for other_title in self.obj.other_titles.all()], self.result) if self.official: add_items("v610", [old_title.title for old_title in self.official.old_title.all()], self.result) @@ -123,25 +129,23 @@ def _format_title_journal_info(self): def _format_collection_info(self): if self.scielo_journal and self.scielo_journal.collection: collection = self.scielo_journal.collection - if collection: - acron3 = collection.acron3 - self.result["collection"] = acron3 - add_to_result("v690", collection.domain, self.result) - add_to_result("v992", collection.acron3, self.result) + self.result["collection"] = collection.acron3 + add_to_result("v690", collection.domain, self.result) + add_to_result("v992", collection.acron3, self.result) def _format_scielo_journal_info(self): """Informações do SciELO Journal""" if self.scielo_journal: issn_scielo = self.scielo_journal.issn_scielo journal_acron = self.scielo_journal.journal_acron - key_to_issn = { - "v50": self.scielo_journal.status if self.scielo_journal.status else None, + key_to_value = { + "v50": self.scielo_journal.status or None, "v68": journal_acron, "v400": issn_scielo, "v880": issn_scielo, - "v930": journal_acron.upper(), + "v930": journal_acron.upper() if journal_acron else None, } - for key, value in key_to_issn.items(): + for key, value in key_to_value.items(): add_to_result(key, value, self.result) self.result["code"] = issn_scielo @@ -153,7 +157,7 @@ def _format_publication_info(self): add_to_result("v301", self.official.initial_year, self.result) add_to_result("v302", self.official.initial_volume, self.result) add_to_result("v303", self.official.initial_number, self.result) - + year = self.official.terminate_year month = self.official.terminate_month @@ -166,49 +170,57 @@ def _format_publication_info(self): add_to_result("v306", self.official.final_number, self.result) def _format_publisher_info(self): - """Informações do owner""" + """Informações do publisher/owner""" try: - # Deixa preparado para tornar obsoleto o owner_history no modelo Journal owner_data = self.obj.owner_data except AttributeError: owner_data = {} - owners = list(self.obj.owner_history.select_related( - 'institution__institution', 'institution__institution__location' - ).all()) - for p in owners: - owner_data["country_acronym"] = p.institution_country_acronym - owner_data["state_acronym"] = p.institution_state_acronym - owner_data["city_name"] = p.institution_city_name - break + first_owner = ( + self.obj.owner_history + .select_related( + 'institution__institution', + 'institution__institution__location', + ) + .first() + ) + if first_owner: + owner_data["country_acronym"] = first_owner.institution_country_acronym + owner_data["state_acronym"] = first_owner.institution_state_acronym + owner_data["city_name"] = first_owner.institution_city_name + add_items("v310", [owner_data.get("country_acronym")], self.result) add_items("v320", [owner_data.get("state_acronym")], self.result) add_items("v480", self.obj.owner_names, self.result) add_items("v490", [owner_data.get("city_name")], self.result) - + def _format_copyright_holder_info(self): """Informações do copyright holder""" - # Primeiro tenta buscar do novo modelo JournalOrganization copyright_holders = self.obj.copyright_holders if copyright_holders: add_items("v62", copyright_holders, self.result) def _format_sponsor_info(self): """Informações do sponsor""" - # Primeiro tenta buscar do novo modelo JournalOrganization sponsors = self.obj.sponsors if sponsors: add_items("v140", sponsors, self.result) def _format_indexing_info(self): """Informações de indexação""" - # secs codes titles_in_db = self.titles_in_database_medline_secs - medline_data = [t for t in titles_in_db if t.indexed_at.acronym.lower() == "medline"] - secs_data = [t for t in titles_in_db if t.indexed_at.acronym.lower() == "secs"] + + medline_data = [] + secs_data = [] + for t in titles_in_db: + acronym_lower = t.indexed_at.acronym.lower() + if acronym_lower == "medline": + medline_data.append(t) + elif acronym_lower == "secs": + secs_data.append(t) + add_items("v37", [sc.identifier for sc in secs_data if sc.identifier], self.result) - title_medline = [m.title for m in medline_data] add_items("v420", [m.identifier for m in medline_data], self.result) - add_items("v421", title_medline, self.result) + add_items("v421", [m.title for m in medline_data], self.result) indexeds_standard = [idx.name for idx in self.obj.indexed_at.all()] additional_indexed_at = [idx.name for idx in self.obj.additional_indexed_at.all()] @@ -238,7 +250,6 @@ def _format_metadata(self): add_to_result("v942", created, self.result) add_to_result("v943", updated, self.result) - # tem que ser objeto datetime self.result["processing_date"] = self.obj.updated.strftime('%Y-%m-%d') self.result["created_at"] = self.obj.created.strftime('%Y-%m-%d') @@ -253,18 +264,22 @@ def _format_issn_info(self): self._format_issn_type(issn_print) def _format_issn_list(self, issn_print, issn_electronic): - if self.official: - issns = [issn for issn in [issn_print, issn_electronic] if issn] - self.result['issns'].extend(issns) - + issns = [issn for issn in [issn_print, issn_electronic] if issn] + self.result['issns'].extend(issns) + def _format_issn_type(self, issn_print): + """ + ATENÇÃO: revisar a lógica de negócio. + No código original, se issn_print == issn_scielo, atribuía 'ONLIN', + o que parece invertido. Mantido o comportamento original aqui, + mas marcado para revisão. + """ if self.scielo_journal: if issn_print == self.scielo_journal.issn_scielo: - type_issn = 'ONLIN' - add_to_result("v35", type_issn, self.result) + # TODO: verificar se deveria ser "PRINT" em vez de "ONLIN" + add_to_result("v35", "ONLIN", self.result) else: - type_issn = "PRINT" - add_to_result("v35", type_issn, self.result) + add_to_result("v35", "PRINT", self.result) def _format_issn_with_type(self, issn_print, issn_electronic): issns = [] @@ -281,51 +296,53 @@ def _format_subject_areas_info(self): def _format_mission_info(self): if not hasattr(self.obj, 'mission') or not self.obj.mission.exists(): return - + missions_data = [] for mission in self.obj.mission.select_related('language'): if mission.language and mission.get_text_pure: missions_data.append({ "l": mission.language.code2, - "_": mission.get_text_pure + "_": mission.get_text_pure, }) - + if missions_data: self.result["v901"] = missions_data - - def _former_dict_journal_history(self, subfield_a, subfield_b): - dict_a = { + + def _format_journal_history_entry(self, subfield_a, subfield_b): + """Formata um registro de histórico do journal.""" + entry = { "_": "", "a": subfield_a, - "b": "C" + "b": "C", } if subfield_b: - dict_a.update({"d": subfield_b}) - return dict_a + entry["d"] = subfield_b + return entry def _format_journal_history(self): - if self.scielo_journal: - journal_history = self.scielo_journal.journal_history.all() - subfields = [] + if not self.scielo_journal: + return + + journal_history = self.scielo_journal.journal_history.all() + subfields = [] + + for jh in journal_history: + subfield_a = f"{jh.year}{jh.month}{jh.day or '01'}" + + # subfield_b determinado exclusivamente pelo evento atual subfield_b = "" - for jh in journal_history: - subfield_a = f"{jh.year}{jh.month}{jh.day or '01'}" - if jh.interruption_reason: - subfield_b = "D" if jh.interruption_reason == "ceased" else "S" - if jh.event_type == "ADMITTED": - dict_subfield =self._former_dict_journal_history( - subfield_a=subfield_a, - subfield_b=subfield_b, - ) - subfields.append(dict_subfield) - elif jh.event_type == "INTERRUPTED": - dict_subfield = self._former_dict_journal_history( - subfield_a=subfield_a, - subfield_b=subfield_b, - ) - subfields.append(dict_subfield) - - self.result["v51"] = subfields + if jh.interruption_reason: + subfield_b = "D" if jh.interruption_reason == "ceased" else "S" + + if jh.event_type in ("ADMITTED", "INTERRUPTED"): + entry = self._format_journal_history_entry( + subfield_a=subfield_a, + subfield_b=subfield_b, + ) + subfields.append(entry) + + self.result["v51"] = subfields + def get_articlemeta_format_title(obj, collection): formatter = ArticlemetaJournalFormatter(obj, collection)