From 38ff7a9d3c70cf2e6e6ec7fb0b8bfb3c760e17f7 Mon Sep 17 00:00:00 2001 From: Ankush Chander Date: Mon, 4 Nov 2024 11:58:22 +0530 Subject: [PATCH 1/2] fix information loss in undirected graph --- pytextrank/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pytextrank/base.py b/pytextrank/base.py index bbd6185..90433e1 100644 --- a/pytextrank/base.py +++ b/pytextrank/base.py @@ -498,7 +498,7 @@ def edge_list ( returns: list of weighted edges """ - edges: typing.List[typing.Tuple[Lemma, Lemma]] = [] + edges: typing.List[typing.Tuple[Lemma, ...]] = [] for sent in self.doc.sents: h = [ @@ -510,11 +510,12 @@ def edge_list ( for hop in range(self.token_lookback): for idx, node in enumerate(h[: -1 - hop]): nbor = h[hop + idx + 1] - edges.append((node, nbor)) + sorted_edge = tuple(sorted([node, nbor], key=lambda x: x.lemma)) + edges.append(sorted_edge) # include weight on the edge: (2, 3, {'weight': 3.1415}) weighted_edges: typing.List[typing.Tuple[Lemma, Lemma, typing.Dict[str, float]]] = [ - (*n, {"weight": w * self.edge_weight}) for n, w in Counter(edges).items() + (node1, node2, {"weight": w * self.edge_weight}) for (node1,node2), w in Counter(edges).items() ] return weighted_edges From 41df2519a2f766eba209d5ebafe50c923baf66b3 Mon Sep 17 00:00:00 2001 From: Ankush Chander Date: Mon, 4 Nov 2024 11:58:37 +0530 Subject: [PATCH 2/2] fix information loss in undirected graph --- pytextrank/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytextrank/base.py b/pytextrank/base.py index 90433e1..ed92ab9 100644 --- a/pytextrank/base.py +++ b/pytextrank/base.py @@ -515,7 +515,7 @@ def edge_list ( # include weight on the edge: (2, 3, {'weight': 3.1415}) weighted_edges: typing.List[typing.Tuple[Lemma, Lemma, typing.Dict[str, float]]] = [ - (node1, node2, {"weight": w * self.edge_weight}) for (node1,node2), w in Counter(edges).items() + (node1, node2, {"weight": w * self.edge_weight}) for (node1, node2), w in Counter(edges).items() ] return weighted_edges