import os
import sys
import pandas as pd
import numpy as np
import ast
import networkx as nx
import ckg_utils
import config.ckg_config as ckg_config
import dash_cytoscape as cyto
from graphdb_connector import connector
from report_manager import report as rp
from analytics_core import utils
from analytics_core.viz import viz, color_list
from networkx.readwrite import json_graph
log_config = ckg_config.report_manager_log
logger = ckg_utils.setup_logging(log_config, key="knowledge")
cyto.load_extra_layouts()
[docs]class Knowledge:
def __init__(self, identifier, data, nodes={}, relationships={}, queries_file=None, colors={}, graph=None, report={}):
self._identifier = identifier
self._data = data
self._colors = {}
self._nodes = nodes
self._relationships = relationships
self._queries_file = queries_file
self._graph = graph
self._report = report
self._default_color = '#636363'
self._colors = colors
if len(colors) == 0:
self._colors = {'Protein': '#1a9850',
'Clinical_variable': '#542788',
'Drug': '#c51b7d',
'Tissue': '#66c2a5',
'Disease': '#b2182b',
'Pathway': '#762a83',
'Publication': '#b35806',
'Biological_process': '#e6f598',
'Symptom': '#f46d43',
'Project': '#3288bd',
'Complex': '#31a354',
'upregulated': '#d53e4f',
'downregulated': '#3288bd'
}
@property
def identifier(self):
return self._identifier
@identifier.setter
def identifier(self, identifier):
self._identifier = identifier
@property
def data(self):
return self._data
@data.setter
def data(self, data):
self._data = data
@property
def entities(self):
return self._entities
@entities.setter
def entities(self, entities):
self._entities = entities
@property
def nodes(self):
return self._nodes
@nodes.setter
def nodes(self, nodes):
self._nodes = nodes
[docs] def update_nodes(self, nodes):
self._nodes.update(nodes)
@property
def relationships(self):
return self._relationships
@relationships.setter
def relationships(self, relationships):
self._relationships = relationships
[docs] def update_relationships(self, relationships):
self._relationships.update(relationships)
@property
def queries_file(self):
return self._queries_file
@queries_file.setter
def queries_file(self, queries_file):
self._queries_file = queries_file
@property
def colors(self):
return self._colors
@colors.setter
def colors(self, colors):
self._colors = colors
@property
def default_color(self):
return self._default_color
@default_color.setter
def default_color(self, default_color):
self._default_color = default_color
@property
def report(self):
return self._report
@report.setter
def report(self, report):
self._report = report
@property
def graph(self):
return self._graph
@graph.setter
def graph(self, graph):
self._graph = graph
[docs] def generate_knowledge_from_regulation(self, entity):
nodes = {}
relationships = {}
color = self.colors[entity] if entity in self.colors else self.default_color
if "regulated" in self.data:
for n in self.data['regulated']:
nodes.update({n: {'type': entity, 'color': color, 'parent': 'Regulated'}})
#relationships.update({('Regulated', n): {'type':'is_regulated', 'weight':1, 'source_color':self.default_color, 'target_color':color}})
return nodes, relationships
[docs] def genreate_knowledge_from_correlation(self, entity_node1, entity_node2, filter, cutoff=0.5):
nodes = {}
relationships = {}
node1_color = self.colors[entity_node1] if entity_node1 in self.colors else self.default_color
node2_color = self.colors[entity_node2] if entity_node2 in self.colors else self.default_color
if 'correlation_correlation' in self.data:
for i, row in self.data['correlation_correlation'].iterrows():
if len(filter) > 0:
if row['node1'] not in filter or row['node2'] not in filter:
continue
if np.abs(row['weight']) >= cutoff:
nodes.update({row['node1']: {'type': entity_node1, 'color': node1_color}, row['node2']: {'type': entity_node2, 'color': node2_color}})
relationships.update({(row['node1'], row['node2']): {'type': 'correlates', 'weight': row['weight'], 'width': np.abs(row['weight']), 'source_color': node1_color, 'target_color': node2_color}})
return nodes, relationships
[docs] def generate_knowledge_from_wgcna(self, data, entity1, entity2, cutoff=0.2):
nodes = {}
relationships = {}
color_dict = color_list.make_color_dict()
node1_color = self.colors[entity1] if entity1 in self.colors else self.default_color
node2_color = self.colors[entity2] if entity2 in self.colors else self.default_color
if 'features_per_module' in data:
modules = data['features_per_module']
for i, row in modules.iterrows():
nodes.update({"ME"+row['modColor']: {'type': 'Module', 'color': color_dict[row['modColor']], 'parent': 'Regulated'}, row['name']: {'type': entity2, 'color': node2_color, 'parent': "ME"+row['modColor']}})
relationships.update({('Regulated', "ME"+row['modColor']): {'type': '', 'weight': 5, 'source_color': self.default_color, 'target_color': color_dict[row['modColor']]}})
relationships.update({("ME"+row['modColor'], row['name']): {'type': 'CONTAINS', 'weight': 5, 'source_color': color_dict[row['modColor']], 'target_color': node2_color}})
if 'module_trait_cor' in data and data['module_trait_cor'] is not None:
correlations = data['module_trait_cor']
if not correlations.index.is_numeric():
correlations = correlations.reset_index()
correlations = correlations.set_index('index').stack().reset_index()
for i, row in correlations.iterrows():
if np.abs(row[0]) >= cutoff:
nodes.update({row['level_1']: {'type': entity1, 'color': node1_color}})
relationships.update({(row['index'], row['level_1']): {'type': 'correlates', 'weight': row[0], 'width': row[0], 'source_color': color_dict[row['index'].replace('ME', '')], 'target_color': node1_color}})
return nodes, relationships
[docs] def generate_knowledge_from_edgelist(self, edgelist, entity1, entity2, source, target, rtype, weight):
nodes = {}
relationships = {}
node1_color = self.colors[entity1] if entity1 in self.colors else self.default_color
node2_color = self.colors[entity2] if entity2 in self.colors else self.default_color
for i, row in edgelist.iterrows():
nodes.update({row[source]: {'type': entity1, 'color': node1_color}, row[target]: {'type': entity2, 'color': node2_color}})
relationships.update({(row[source], row[target]): {'type': rtype, 'source_color': node1_color, 'target_color': node2_color, 'weight': row[weight]}})
self.update_nodes(nodes)
self.update_relationships(relationships)
[docs] def generate_knowledge_from_annotations(self, entity1, entity2, filter=None):
nodes = {}
relationships = {}
node1_color = self.colors[entity1] if entity1 in self.colors else self.default_color
node2_color = self.colors[entity2] if entity2 in self.colors else self.default_color
if entity2.lower()+'_annotation' in self.data:
for i, row in self.data[entity2.lower()+'_annotation'].iterrows():
if len(filter) > 0:
if row['identifier'] not in filter or row['annotation'] not in filter:
continue
nodes.update({row['identifier']: {'type': entity1, 'color': node1_color}, row['annotation']: {'type': entity2, 'color': node2_color}})
relationships.update({(row['identifier'], row['annotation']): {'type': 'is_annotated', 'source_color': node1_color, 'target_color': node2_color}})
return nodes, relationships
[docs] def generate_knowledge_from_similarity(self, entity='Project'):
nodes = {}
relationships = {}
node_color = self.colors[entity] if entity in self.colors else self.default_color
if 'similar_projects' in self.data:
similar_projects = pd.DataFrame.from_dict(self.data['similar_projects'])
for i, row in similar_projects.iterrows():
nodes.update({row['other']: {'type': entity, 'color': node_color}})
relationships.update({(row['current'], row['other']): {'type': 'is_similar', 'weight': row['similarity_pearson'], 'width': row['similarity_pearson'], 'source_color': node_color, 'target_color': node_color}})
return nodes, relationships
[docs] def generate_knowledge_from_queries(self, entity, queries_results):
nodes = {}
relationships = {}
for node2 in queries_results:
node1_color = self.colors[entity] if entity in self.colors else self.default_color
node2_color = self.colors[node2] if node2 in self.colors else self.default_color
nodes.update({node2: {'color': node2_color, 'type': 'Group'}})
result = queries_results[node2]
for i, row in result.iterrows():
rel_type = row['type'] if 'type' in row else 'associated'
weight = row['weight'] if 'weight' in row else 5
nodes.update({row['node1']: {'type': entity, 'color': node1_color}, row['node2'].replace("'", "").title(): {'type': node2, 'color': node2_color, 'parent': node2}})
relationships.update({(row['node1'], row['node2'].replace("'", "").title()): {'type': rel_type, 'weight': weight, 'width': weight, 'source_color': node1_color, 'target_color': node2_color}})
relationships.update({(row['node2'].replace("'", "").title(), node2): {'type': 'is_a', 'weight': 5, 'width': 5, 'source_color': node2_color, 'target_color': node2_color}})
return nodes, relationships
[docs] def send_query(self, query):
driver = connector.getGraphDatabaseConnectionConfiguration()
data = connector.getCursorData(driver, query)
return data
[docs] def query_data(self, replace):
query_data = {}
try:
cwd = os.path.abspath(os.path.dirname(__file__))
cypher_queries = ckg_utils.get_queries(os.path.join(cwd, self.queries_file))
if cypher_queries is not None:
for query_name in cypher_queries:
if 'query_type' in cypher_queries[query_name]:
if cypher_queries[query_name]['query_type'] == 'knowledge_report':
query = cypher_queries[query_name]['query']
for r, by in replace:
query = query.replace(r, by)
query_data[query_name] = self.send_query(query)
except Exception as err:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
logger.error("Reading queries from file {}: {}, file: {},line: {}, err: {}".format(self.queries_file, sys.exc_info(), fname, exc_tb.tb_lineno, err))
return query_data
[docs] def generate_cypher_nodes_list(self):
nodes = ['"{}"'.format(n) for n in self.nodes.keys()]
nodes = ",".join(nodes)
return nodes
[docs] def generate_knowledge_graph(self):
G = nx.DiGraph()
G.add_nodes_from(self.nodes.items())
G.add_edges_from(self.relationships.keys())
nx.set_edge_attributes(G, self.relationships)
self.graph = G
[docs] def reduce_to_subgraph(self, nodes):
valid_nodes = set(nodes).intersection(list(self.nodes.keys()))
valid_nodes.add("Regulated")
aux = set()
self.generate_knowledge_graph()
for n in valid_nodes:
if n in self.nodes:
for n1, n2,attr in self.graph.out_edges(n, data=True):
aux.add(n1)
aux.add(n2)
for n1,n2,attr in self.graph.in_edges(n, data=True):
aux.add(n1)
aux.add(n2)
remove = set(self.nodes.keys()).difference(aux.union(valid_nodes))
self.graph.remove_nodes_from(list(remove))
self.nodes = dict(self.graph.nodes(data=True))
self.relationships = {(a,b):c for a,b,c in self.graph.edges(data=True)}
[docs] def get_knowledge_graph_plot(self):
if self.graph is None:
self.generate_knowledge_graph()
title = 'Project {} Knowledge Graph'.format(self.identifier)
if self.data is not None:
if 'name' in self.data:
title = 'Project {} Knowledge Graph'.format(self.data['name'])
args = {'title': title,
'node_properties': {},
'width': 2600,
'height': 2600,
'maxLinkWidth': 7,
'maxRadius': 20}
color_selector = "{'selector': '[name = \"KEY\"]', 'style': {'font-size': 10, 'background-color':'VALUE','width': 50,'height': 50,'background-image':'/assets/graph_icons/ENTITY.png','background-fit': 'cover','opacity':OPACITY}}"
stylesheet = [{'selector': 'node', 'style': {'label': 'data(name)', 'z-index': 9999}},
{'selector': 'edge', 'style': {'label': 'data(type)',
'curve-style': 'unbundled-bezier',
'control-point-distance': '20px',
'control-point-weight': '0.7',
'z-index': 5000,
'line-color': '#bdbdbd',
'opacity': 0.2,
'font-size': '7px'}}]
layout = {'name': 'circle'}
#stylesheet.extend([{'selector':'[weight < 0]', 'style':{'line-color':'#3288bd'}},{'selector':'[width > 0]', 'style':{'line-color':'#d73027'}}])
for n in self.nodes:
color = self.nodes[n]['color']
image = self.nodes[n]['type']
opacity = 0.3 if image == 'Module' or image == 'Group' else 1
stylesheet.append(ast.literal_eval(color_selector.replace("KEY", n.replace("'", "")).replace("VALUE", color).replace("ENTITY", image).replace("OPACITY", str(opacity))))
stylesheet.extend([{'selector': '[weight < 0]', 'style': {'line-color': '#4add1'}}, {'selector': '[weight > 0]', 'style': {'line-color': '#d6604d'}}])
args['stylesheet'] = stylesheet
args['layout'] = layout
nodes_table, edges_table = viz.network_to_tables(self.graph)
nodes_fig_table = viz.get_table(nodes_table, identifier=self.identifier+"_nodes_table", title="Nodes table")
edges_fig_table = viz.get_table(edges_table, identifier=self.identifier+"_edges_table", title="Edges table")
cy_elements, mouseover_node = utils.networkx_to_cytoscape(self.graph)
#args['mouseover_node'] = mouseover_node
net = {"notebook": [cy_elements, stylesheet, layout], "app": viz.get_cytoscape_network(cy_elements, self.identifier, args), "net_tables": (nodes_fig_table, edges_fig_table), "net_json": json_graph.node_link_data(self.graph)}
return net
[docs] def generate_report(self, visualization='sankey'):
report = rp.Report(identifier="knowledge")
if visualization == 'network':
plots = [self.get_knowledge_graph_plot()]
elif visualization == 'sankey':
if self.graph is None:
self.generate_knowledge_graph()
df = nx.to_pandas_edgelist(self.graph).fillna(1)
plots = [viz.get_sankey_plot(df, self.identifier, args={'source': 'source',
'target': 'target',
'source_colors': 'source_color',
'target_colors': 'target_color',
'hover': 'type',
'pad': 10,
'weight': 'weight',
'orientation': 'h',
'valueformat': '.0f',
'width': 1600,
'height': 2200,
'font': 10,
'title':'Knowledge Graph'})]
report.plots = {("Knowledge Graph","Knowledge Graph"): plots}
self.report = report
[docs] def save_report(self, directory):
if not os.path.exists(directory):
os.makedirs(directory)
if not os.path.exists(os.path.join(directory, "Knowledge")):
os.makedirs(os.path.join(directory, "Knowledge"))
self.report.save_report(directory=os.path.join(directory, "Knowledge"))
[docs]class ProjectKnowledge(Knowledge):
def __init__(self, identifier, data, nodes={}, relationships={}, colors={}, graph=None, report={}):
queries_file = 'queries/project_knowledge_cypher.yml'
Knowledge.__init__(self, identifier, data=data, nodes=nodes, relationships=relationships, queries_file=queries_file, colors=colors, graph=graph, report=report)
[docs] def generate_knowledge(self):
similarity_knowledge = self.generate_knowledge_from_similarity(entity='Project')
self.nodes.update(similarity_knowledge[0])
self.relationships.update(similarity_knowledge[1])
self.relationships.update({(self.data['name'], 'Regulated'): {'type': 'has', 'weight':5, 'width':5, 'source_color':self.colors['Project'], 'target_color':self.default_color}})
queries_results = self.query_data(replace=[('PROJECTID',self.identifier)])
queries_knowledge = self.generate_knowledge_from_queries(entity='Project', queries_results=queries_results)
self.nodes.update(queries_knowledge[0])
self.relationships.update(queries_knowledge[1])
[docs]class ProteomicsKnowledge(Knowledge):
def __init__(self, identifier, data, nodes={}, relationships={}, colors={}, graph=None, report={}):
queries_file = 'queries/proteomics_knowledge_cypher.yml'
Knowledge.__init__(self, identifier, data=data, nodes=nodes, relationships=relationships, queries_file=queries_file, colors=colors, graph=graph, report=report)
[docs] def generate_knowledge(self):
regulation_knowledge = self.generate_knowledge_from_regulation(entity='Protein')
#correlation_knowledge = self.genreate_knowledge_from_correlation('Protein', 'Protein', filter=regulation_knowledge[0].keys())
self.nodes = regulation_knowledge[0]
#self.nodes.update(correlation_knowledge[0])
self.relationships = regulation_knowledge[1]
#self.relationships.update(correlation_knowledge[1])
nodes = self.generate_cypher_nodes_list()
limit_count = 3 if len(nodes)>10 else 1
queries_results = self.query_data(replace=[('PROTEINIDS',nodes), ('PROJECTID', self.identifier), ('LIMIT_COUNT', str(limit_count))])
queries_knowledge = self.generate_knowledge_from_queries(entity='Protein', queries_results=queries_results)
self.nodes.update(queries_knowledge[0])
self.relationships.update(queries_knowledge[1])
[docs]class ClinicalKnowledge(Knowledge):
def __init__(self, identifier, data, nodes={}, relationships={}, colors={}, graph=None, report={}):
queries_file = 'queries/clinical_knowledge_cypher.yml'
Knowledge.__init__(self, identifier, data=data, nodes=nodes, relationships=relationships, queries_file=queries_file, colors=colors, graph=graph, report=report)
[docs] def generate_knowledge(self):
regulation_knowledge = self.generate_knowledge_from_regulation(entity='Protein')
correlation_knowledge = self.genreate_knowledge_from_correlation('Protein', 'Protein', filter=regulation_knowledge[0].keys())
self.nodes = regulation_knowledge[0]
self.nodes.update(correlation_knowledge[0])
self.relationships = regulation_knowledge[1]
self.relationships.update(correlation_knowledge[1])
nodes = self.generate_cypher_nodes_list()
queries_results = self.query_data(replace=[('PROJECTID', nodes)])
queries_knowledge = self.generate_knowledge_from_queries(entity='Clinical', queries_results=queries_results)
self.nodes.update(queries_knowledge[0])
self.relationships.update(queries_knowledge[1])
[docs]class MultiOmicsKnowledge(Knowledge):
def __init__(self, identifier, data, nodes={}, relationships={}, colors={}, graph=None, report={}):
queries_file = 'queries/multiomics_knowledge_cypher.yml'
Knowledge.__init__(self, identifier, data=data, nodes=nodes, relationships=relationships, queries_file=queries_file, colors=colors, graph=graph, report=report)
[docs] def generate_knowledge(self):
if 'wgcna_wgcna' in self.data:
for dtype in self.data['wgcna_wgcna']:
if dtype == 'wgcna-proteomics':
entity1 = 'Clinical_variable'
entity2 = 'Protein'
wgcna_knowledge = self.generate_knowledge_from_wgcna(self.data['wgcna_wgcna'][dtype], entity1, entity2)
self.nodes.update(wgcna_knowledge[0])
self.relationships.update(wgcna_knowledge[1])