Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
d3062b7
SDEV-5193 - lint - match.py - eliminate mypy type errors.
dustinbleile Nov 12, 2025
592d28f
SDEV-5193 - lint - inputs.py - remove mypy errors - IprCopyVariant ha…
dustinbleile Nov 12, 2025
24dec89
SDEV-5193 - lint - test_inputs.py - mypy type fixes - use Sequence in…
dustinbleile Nov 12, 2025
e6655ec
SDEV-5193 - lint - ipr.py - excluded variable - type consistency
dustinbleile Nov 12, 2025
5cdc11a
SDEV-5193 - lint - annotate.py - annotate_signature_variants - return…
dustinbleile Nov 12, 2025
f0f68fa
SDEV-5193 - lint - annotate_positional_variants - ignore type error f…
dustinbleile Nov 12, 2025
cd67aa8
SDEV-5193 - lint - mypy - annoate.py - resolve KbMatch types vs Hasha…
dustinbleile Nov 12, 2025
1639e83
SDEV-5193 - lint - mypy - ipr.py - use Cast to reduce type errors
dustinbleile Nov 12, 2025
0ee36c7
SDEV-5193 - refactor typing - fixes for mypy typing checks
dustinbleile Nov 14, 2025
91eb350
Merge branch 'develop' of https://github.com/bcgsc/pori_python into i…
dustinbleile Nov 17, 2025
1a9f614
Merge branch 'develop' of https://github.com/bcgsc/pori_python into i…
dustinbleile Nov 17, 2025
4261381
Merge branch 'improvement/SDEV-5193_mypy_type_fixes' of https://githu…
dustinbleile Nov 17, 2025
b209e23
DEVSU-2797 - main.ipr_report - allow null ipr_url if ipr_upload = False
dustinbleile Dec 12, 2025
e95203c
DEVSU-2797 - lint isort - unsorted imports
dustinbleile Dec 12, 2025
0f7b2d7
DEVSU-2797 - improve error message logging when graphkb_conn is made …
dustinbleile Dec 12, 2025
ecb9060
bugfix - ipr_report - upload_json immediately returns
dustinbleile Dec 16, 2025
b572f2b
DEVSU-2797 - check os.environ for IPR_URL and GRAPHKB_URL
dustinbleile Dec 16, 2025
9cd51c3
lint - fix fstring
dustinbleile Dec 16, 2025
b051b27
Merge branch 'develop' of https://github.com/bcgsc/pori_python into f…
dustinbleile Jan 7, 2026
e05ec5b
minor type fix - null string instead of None
dustinbleile Jan 7, 2026
57f97f9
minor lint - isort inputs & quotes
dustinbleile Jan 7, 2026
a48b090
test_genes - remove unused import PREFERRED_GENE_SOURCE_NAME
dustinbleile Jan 7, 2026
87cd9ff
raise errors if no graphkb url has been set
dustinbleile Jan 7, 2026
686511d
Error message when no IPR_URL defined
dustinbleile Jan 7, 2026
abcc23e
Merge branch 'feat/DEVSU-2797-remove-gsc-defaults_dustin' of https://…
dustinbleile Jan 8, 2026
2c39230
IPR_URL - typing fix - only string
dustinbleile Jan 8, 2026
3682a6d
lint - inputs.py - fix typing warnings
dustinbleile Jan 8, 2026
9255bf0
SDEV-5193 - type fix - KbMatchSections['KbMatchedVariants'] -> KbMatc…
dustinbleile Jan 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pori_python/graphkb/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,15 +315,15 @@ def equivalent_types(

# Convert rid to displayName if needed
if looks_like_rid(type1):
type1 = conn.get_records_by_id([type1])[0]["displayName"]
type1 = str(conn.get_records_by_id([type1])[0].get("displayName", type1))
if looks_like_rid(type2):
type2 = conn.get_records_by_id([type2])[0]["displayName"]
type2 = str(conn.get_records_by_id([type2])[0].get("displayName", type2))

# Get type terms from observed variant
terms1 = []
terms1 = set()
if strict:
try:
terms1.append(get_term_by_name(conn, type1)["@rid"])
terms1.add(get_term_by_name(conn, type1)["@rid"])
except Exception:
pass
else:
Expand Down
13 changes: 10 additions & 3 deletions pori_python/graphkb/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def cache_key(request_body) -> str:
class GraphKBConnection:
def __init__(
self,
url: str = os.environ.get("GRAPHKB_URL"),
url: str = os.environ.get("GRAPHKB_URL", ""),
username: str = "",
password: str = "",
use_global_cache: bool = True,
Expand Down Expand Up @@ -143,6 +143,8 @@ def request(self, endpoint: str, method: str = "GET", **kwargs) -> Dict:
Returns:
dict: the json response as a python dict
"""
if not self.url:
raise ValueError("no GraphKBConnection url set - cannot make a login demo")
url = join_url(self.url, endpoint)
self.request_count += 1
connect_timeout = 7
Expand Down Expand Up @@ -222,6 +224,8 @@ def login_demo(self) -> None:
1. get a first token from KeyCloak using username and password; self.login_demo()
2. get a second token from the GraphKB API using keyCloakToken; self.login()
"""
if not self.url:
raise ValueError("no GraphKBConnection url set - cannot make a login demo")
url_parts = urlsplit(self.url)
base_url = f"{url_parts.scheme}://{url_parts.netloc}"

Expand Down Expand Up @@ -250,8 +254,11 @@ def login(self, username: str, password: str, pori_demo: bool = False) -> None:
connect_timeout = 7
read_timeout = 61

# KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo
if pori_demo or "pori-demo" in self.url:
if not self.url:
raise ValueError("no GraphKBConnection url set - cannot login")
elif pori_demo or "pori-demo" in self.url:
# KBDEV-1328. Alt. GraphKB login for GSC's PORI online demo
logger.warning("login demo")
self.login_demo()

# use requests package directly to avoid recursion loop on login failure
Expand Down
63 changes: 29 additions & 34 deletions pori_python/ipr/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def annotate_expression_variants(
disease_matches: List[str],
variants: List[IprExprVariant],
show_progress: bool = False,
) -> List[KbMatch]:
) -> List[Hashabledict]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why these need to be hashable? We're loosing all the structure of KbMatch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hashabledict is used for dropping duplicates from a list (convert to set).

The function seems to be used in exactly one place and immediately converted to a hashable type.

Ideally, I would have liked to just make KbMatch a hashable dict type, but I couldn't figure that out here. If someone can figure that out, I think that is the appropriate solution.

I was just trying to something that gave the appropriate mypy warnings and didn't create errors from any of the KbMatch duplicates.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think probably best to preserve KbMatch structure; will take a crack at making kbmatch a hashable dict type.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think probably best to preserve KbMatch structure; will take a crack at making kbmatch a hashable dict type.

Hi @elewis2, are you having any luck with this?

I think we will want to start with these changes regardless and add extra typing details as we can.
It's more important that the parts where structure is given is accurate than the fact that it is lost at some point. At least this way the mypy warnings are accurate.

The other way to keep the KbMatch structure would be to refactor to eliminate the Hashabledict all together. If I understand it was only used to drop duplicates. I think you could eliminate the Hashabledict by creating functions that can take lists of kbMatches and drop duplicates.

"""Annotate expression variants with GraphKB in the IPR alterations format.

Args:
Expand All @@ -113,10 +113,10 @@ def annotate_expression_variants(
show_progress (bool): Progressbar displayed for long runs.

Returns:
list of kbMatches records for IPR
list of Hashabledict records for IPR
"""
skipped = 0
alterations = []
alterations: List[Hashabledict] = []
problem_genes = set()
logger.info(f"Starting annotation of {len(variants)} expression category_variants")
iterfunc = tqdm if show_progress else iter
Expand All @@ -133,7 +133,7 @@ def annotate_expression_variants(
for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_matches):
ipr_row["variant"] = row["key"]
ipr_row["variantType"] = row.get("variantType", "exp")
alterations.append(ipr_row)
alterations.append(Hashabledict(ipr_row))
except FeatureNotFoundError as err:
problem_genes.add(gene)
logger.debug(f"Unrecognized gene ({gene} {variant}): {err}")
Expand All @@ -156,7 +156,7 @@ def annotate_copy_variants(
disease_matches: List[str],
variants: List[IprCopyVariant],
show_progress: bool = False,
) -> List[KbMatch]:
) -> List[Hashabledict]:
"""Annotate allowed copy variants with GraphKB in the IPR alterations format.

Args:
Expand All @@ -169,7 +169,7 @@ def annotate_copy_variants(
list of kbMatches records for IPR
"""
skipped = 0
alterations = []
alterations: List[Hashabledict] = []
problem_genes = set()

logger.info(f"Starting annotation of {len(variants)} copy category_variants")
Expand All @@ -188,7 +188,7 @@ def annotate_copy_variants(
for ipr_row in get_ipr_statements_from_variants(graphkb_conn, matches, disease_matches):
ipr_row["variant"] = row["key"]
ipr_row["variantType"] = row.get("variantType", "cnv")
alterations.append(ipr_row)
alterations.append(Hashabledict(ipr_row))
except FeatureNotFoundError as err:
problem_genes.add(gene)
logger.debug(f"Unrecognized gene ({gene} {variant}): {err}")
Expand All @@ -213,7 +213,7 @@ def annotate_positional_variants(
variants: Sequence[IprStructuralVariant] | Sequence[Hashabledict],
disease_matches: List[str],
show_progress: bool = False,
) -> List[Hashabledict]:
) -> Sequence[Hashabledict]:
"""Annotate SNP, INDEL or fusion variant calls with GraphKB and return in IPR match format.

Hashable type is required to turn lists into sets.
Expand All @@ -239,10 +239,11 @@ def annotate_positional_variants(
continue

for var_key in VARIANT_KEYS:
variant = row.get(var_key)
variant = row.get(var_key, "")
matches = []
if not variant or isnull(variant):
continue
variant = str(variant)
try:
try:
matches = gkb_match.match_positional_variant(graphkb_conn, variant)
Expand Down Expand Up @@ -277,15 +278,15 @@ def annotate_positional_variants(
except FeatureNotFoundError as err:
logger.debug(f"failed to match positional variants ({variant}): {err}")
errors += 1
if "gene" in row:
problem_genes.add(row["gene"])
elif "gene1" in row and f"({row['gene1']})" in str(err):
problem_genes.add(row["gene1"])
elif "gene2" in row and f"({row['gene2']})" in str(err):
problem_genes.add(row["gene2"])
elif "gene1" in row and "gene2" in row:
problem_genes.add(row["gene1"])
problem_genes.add(row["gene2"])
if row.get("gene"):
problem_genes.add(row["gene"]) # type: ignore
elif row.get("gene1") and f"({row['gene1']})" in str(err): # type: ignore
problem_genes.add(row["gene1"]) # type: ignore
elif row.get("gene2") and f"({row['gene2']})" in str(err): # type: ignore
problem_genes.add(row["gene2"]) # type: ignore
elif row.get("gene1") and row.get("gene2"): # type: ignore
problem_genes.add(row["gene1"]) # type: ignore
problem_genes.add(row["gene2"]) # type: ignore
else:
raise err
except HTTPError as err:
Expand Down Expand Up @@ -314,7 +315,7 @@ def annotate_signature_variants(
disease_matches: List[str],
variants: List[IprSignatureVariant] = [],
show_progress: bool = False,
) -> List[KbMatch]:
) -> List[Hashabledict]:
"""Annotate Signature variants with GraphKB in the IPR alterations format.

Match to corresponding GraphKB Variants, then to linked GraphKB Statements
Expand Down Expand Up @@ -433,27 +434,21 @@ def annotate_variants(
# MATCHING COPY VARIANTS
logger.info(f"annotating {len(copy_variants)} copy variants")
gkb_matches.extend(
[
Hashabledict(copy_var)
for copy_var in annotate_copy_variants(
graphkb_conn, disease_matches, copy_variants, show_progress=interactive
)
]
annotate_copy_variants(
graphkb_conn, disease_matches, copy_variants, show_progress=interactive
)
)
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")

# MATCHING EXPRESSION VARIANTS
logger.info(f"annotating {len(expression_variants)} expression variants")
gkb_matches.extend(
[
Hashabledict(exp_var)
for exp_var in annotate_expression_variants(
graphkb_conn,
disease_matches,
expression_variants,
show_progress=interactive,
)
]
annotate_expression_variants(
graphkb_conn,
disease_matches,
expression_variants,
show_progress=interactive,
)
)
logger.debug(f"\tgkb_matches: {len(gkb_matches)}")

Expand Down
2 changes: 1 addition & 1 deletion pori_python/ipr/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(
self,
username: str,
password: str,
url: str = os.environ.get("IPR_URL"),
url: str = os.environ.get("IPR_URL", ""),
):
self.token = None
self.url = url
Expand Down
39 changes: 21 additions & 18 deletions pori_python/ipr/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import re
from Bio.Data.IUPACData import protein_letters_3to1
from numpy import nan
from typing import Any, Callable, Dict, Iterable, List, Set, Tuple, cast
from typing import Any, Callable, Dict, Iterable, List, Sequence, Set, Tuple, cast

from pori_python.graphkb.match import INPUT_COPY_CATEGORIES, INPUT_EXPRESSION_CATEGORIES
from pori_python.types import (
Expand All @@ -26,8 +26,8 @@
from .constants import (
COSMIC_SIGNATURE_VARIANT_TYPE,
HLA_SIGNATURE_VARIANT_TYPE,
MSI_MAPPING,
HRD_MAPPING,
MSI_MAPPING,
TMB_SIGNATURE,
TMB_SIGNATURE_VARIANT_TYPE,
)
Expand Down Expand Up @@ -248,20 +248,23 @@ def row_key(row: Dict) -> Tuple[str, ...]:
row["cnvState"] = display_name_mapping[kb_cat]
row["variant"] = kb_cat
row["variantType"] = "cnv"
chrband = row.get("chromosomeBand", False)
chrom = row.pop("chromosome", False)
if not chrom:
chrom = row.pop("chr", False)
# remove chr if it was not used for chrom
row.pop("chr", False)
if chrom:

# Find chromosome and remove chromosome values
chrom = ""
if "chromosome" in row:
chrom = str(row.pop("chromosome", "")) or chrom # type: ignore
if "chr" in row:
chrom = str(row.pop("chr", "")) or chrom # type: ignore

# Include chromosome in chromosomeBand
chrband = row.get("chromosomeBand", "")

if chrom and chrband:
# check that chr isn't already in the chrband;
# this regex from https://vrs.ga4gh.org/en/1.2/terms_and_model.html#id25
if chrband and (re.match(r"^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$", chrband)):
if isinstance(chrom, int):
chrom = str(chrom)
if re.match(r"^cen|[pq](ter|([1-9][0-9]*(\.[1-9][0-9]*)?))$", chrband):
chrom = chrom.strip("chr")
row["chromosomeBand"] = chrom + row["chromosomeBand"]
row["chromosomeBand"] = chrom + chrband

return ret_list

Expand Down Expand Up @@ -441,7 +444,7 @@ def row_key(row: Dict) -> Tuple[str, ...]:
return result


def preprocess_cosmic(rows: Iterable[Dict]) -> Iterable[Dict]:
def preprocess_cosmic(rows: Iterable[Dict]) -> Sequence[Dict]:
"""
Process cosmic inputs into preformatted signature inputs
Note: Cosmic and dMMR already evaluated against thresholds in gsc_report
Expand All @@ -456,7 +459,7 @@ def preprocess_cosmic(rows: Iterable[Dict]) -> Iterable[Dict]:
]


def preprocess_hla(rows: Iterable[Dict]) -> Iterable[Dict]:
def preprocess_hla(rows: Iterable[Dict]) -> Sequence[Dict]:
"""
Process hla inputs into preformatted signature inputs
"""
Expand All @@ -480,7 +483,7 @@ def preprocess_hla(rows: Iterable[Dict]) -> Iterable[Dict]:

def preprocess_tmb(
tmb_high: float, tmburMutationBurden: Dict = {}, genomeTmb: float | str = ""
) -> Iterable[Dict]:
) -> Sequence[Dict]:
"""
Process tumour mutation burden (tmb) input(s) into preformatted signature input.
Get compared to threshold; signature CategoryVariant created only if threshold met.
Expand Down Expand Up @@ -530,7 +533,7 @@ def preprocess_tmb(
return []


def preprocess_msi(msi: Any) -> Iterable[Dict]:
def preprocess_msi(msi: Any) -> Sequence[Dict]:
"""
Process micro-satellite input into preformatted signature input.
Both msi & mss gets mapped to corresponding GraphKB Signature CategoryVariants.
Expand All @@ -557,7 +560,7 @@ def preprocess_msi(msi: Any) -> Iterable[Dict]:
return []


def preprocess_hrd(hrd: Any) -> Iterable[Dict]:
def preprocess_hrd(hrd: Any) -> Sequence[Dict]:
"""
Process hrd input into preformatted signature input.
HRD gets mapped to corresponding GraphKB Signature CategoryVariants.
Expand Down
Loading