Source code for bio2bel_drugbank.models

# -*- coding: utf-8 -*-

"""Database models for bio2bel_drugbank."""

from typing import Set

from sqlalchemy import Boolean, Column, Date, ForeignKey, Integer, String, Table, Text, and_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import backref, relationship

from pybel import BELGraph
from pybel.constants import REGULATES
from pybel.dsl import BaseEntity, abundance, activity, protein
from .constants import MODULE_NAME
from .patent_utils import download_google_patents

__all__ = [
    'Action',
    'Base',
    'Type',
    'Drug',
    'DrugProteinInteraction',
    'Alias',
    'AtcCode',
    'Group',
    'Category',
    'Patent',
    'DrugXref',
    'Protein',
    'Species',
    'Article',
    'drug_category',
    'drug_group',
    'drug_patent',
]

Base = declarative_base()

DRUG_TABLE_NAME = f'{MODULE_NAME}_drug'
TYPE_TABLE_NAME = f'{MODULE_NAME}_type'
ALIAS_TABLE_NAME = f'{MODULE_NAME}_alias'
ATC_TABLE_NAME = f'{MODULE_NAME}_atc'
CATEGORY_TABLE_NAME = f'{MODULE_NAME}_category'
GROUP_TABLE_NAME = f'{MODULE_NAME}_group'
DRUG_CATEGORY_TABLE_NAME = f'{MODULE_NAME}_drug_category'
DRUG_GROUP_TABLE_NAME = f'{MODULE_NAME}_drug_group'
PATENT_TABLE_NAME = f'{MODULE_NAME}_patent'
DRUG_PATENT_TABLE_NAME = f'{MODULE_NAME}_drug_patent'
DRUG_XREF_TABLE_NAME = f'{MODULE_NAME}_drug_xref'
SPECIES_TABLE_NAME = f'{MODULE_NAME}_species'
PROTEIN_TABLE_NAME = f'{MODULE_NAME}_protein'
ACTION_TABLE_NAME = f'{MODULE_NAME}_action'
DRUG_PROTEIN_TABLE_NAME = f'{MODULE_NAME}_drug_protein'
DRUG_PROTEIN_ACTION_TABLE_NAME = f'{MODULE_NAME}_drug_protein_action'
ARTICLE_TABLE_NAME = f'{MODULE_NAME}_article'
DRUG_PROTEIN_ARTICLE_TABLE_NAME = f'{MODULE_NAME}_drug_protein_article'

drug_group = Table(
    DRUG_GROUP_TABLE_NAME,
    Base.metadata,
    Column('drug_id', Integer, ForeignKey(f'{DRUG_TABLE_NAME}.id'), primary_key=True),
    Column('group_id', Integer, ForeignKey(f'{GROUP_TABLE_NAME}.id'), primary_key=True)
)

drug_category = Table(
    DRUG_CATEGORY_TABLE_NAME,
    Base.metadata,
    Column('drug_id', Integer, ForeignKey(f'{DRUG_TABLE_NAME}.id'), primary_key=True),
    Column('category_id', Integer, ForeignKey(f'{CATEGORY_TABLE_NAME}.id'), primary_key=True)
)

drug_patent = Table(
    DRUG_PATENT_TABLE_NAME,
    Base.metadata,
    Column('drug_id', Integer, ForeignKey(f'{DRUG_TABLE_NAME}.id'), primary_key=True),
    Column('patent_id', Integer, ForeignKey(f'{PATENT_TABLE_NAME}.id'), primary_key=True)
)

dpi_action = Table(
    DRUG_PROTEIN_ACTION_TABLE_NAME,
    Base.metadata,
    Column('drug_protein_id', Integer, ForeignKey(f'{DRUG_PROTEIN_TABLE_NAME}.id'), primary_key=True),
    Column('action_id', Integer, ForeignKey(f'{ACTION_TABLE_NAME}.id'), primary_key=True)
)

dpi_article = Table(
    DRUG_PROTEIN_ARTICLE_TABLE_NAME,
    Base.metadata,
    Column('drug_protein_id', Integer, ForeignKey(f'{DRUG_PROTEIN_TABLE_NAME}.id'), primary_key=True),
    Column('article_id', Integer, ForeignKey(f'{ARTICLE_TABLE_NAME}.id'), primary_key=True)
)

PATENT_PREFIX_MAP = {
    'United States': 'US',
    'Canada': 'CA',
}


[docs]class Type(Base): """Represents the type of a drug - either small molecule or biologic.""" __tablename__ = TYPE_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(255), unique=True, index=True, nullable=False) def __repr__(self): return self.name
[docs]class Drug(Base): """Represents a drug.""" __tablename__ = DRUG_TABLE_NAME id = Column(Integer, primary_key=True) type_id = Column(Integer, ForeignKey(f'{TYPE_TABLE_NAME}.id'), nullable=False) type = relationship(Type) drugbank_id = Column(String(255), nullable=False) cas_number = Column(String(255), nullable=True) name = Column(String(1024), nullable=False) description = Column(Text, nullable=True) inchi = Column(Text, nullable=True) inchikey = Column(String(255), nullable=True) def __repr__(self): return self.name
[docs] def as_bel(self) -> abundance: """Get this drug as a PyBEL abundance identified by its DrugBank identifier.""" return abundance(namespace=MODULE_NAME, name=self.name, identifier=self.drugbank_id)
[docs] def as_inchi_bel(self) -> abundance: """Get this drug as a PyBEL abundance identified by InChI.""" # https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000383 return abundance(namespace='inchi', name=self.inchi, identifier=self.inchi)
[docs] def as_inchikey_bel(self) -> abundance: """Get this drug as a PyBEL abundance identified by InChI-key.""" # https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000387 return abundance(namespace='inchikey', name=self.inchikey, identifier=self.inchikey)
[docs] def as_cas_bel(self) -> abundance: """Get this drug as a PyBEL abundance identified by its CAS identifier.""" # https://www.ebi.ac.uk/miriam/main/datatypes/MIR:00000237 return abundance(namespace='cas', name=self.cas_number, identifier=self.cas_number)
[docs]class DrugXref(Base): """Represents a drug's cross-reference to another database.""" __tablename__ = DRUG_XREF_TABLE_NAME id = Column(Integer, primary_key=True) resource = Column(String(255), nullable=False) identifier = Column(String(255), nullable=False) drug_id = Column(Integer, ForeignKey(f'{DRUG_TABLE_NAME}.id'), nullable=False) drug = relationship(Drug, backref=backref('xrefs')) def __repr__(self): return f'{self.resource}:{self.identifier}' @classmethod def has_identifier(cls, resource, identifier): return and_(cls.resource == resource, cls.identifier == identifier)
[docs]class Patent(Base): """Represents a patent.""" __tablename__ = PATENT_TABLE_NAME id = Column(Integer, primary_key=True) patent_id = Column(String(255), nullable=False, unique=True, index=True) country = Column(String(255), nullable=False) approved = Column(Date) expires = Column(Date) pediatric_extension = Column(Boolean) drugs = relationship(Drug, secondary=drug_patent, lazy='dynamic', backref=backref('patents', lazy='dynamic')) @staticmethod def filter_pk(country, patent_id): return and_(Patent.country == country, Patent.patent_id == patent_id) def to_json(self): return dict( patent_id=self.patent_id, country=self.country, approved_date=str(self.approved), expires_date=str(self.expires), pediatric_extension=self.pediatric_extension ) @property def google_url(self) -> str: """Return the Google Patents URL of this patent.""" return f'https://patents.google.com/patent/{PATENT_PREFIX_MAP[self.country]}{self.patent_id}' def download_pdfs(self, outdir: str): return download_google_patents(self.google_url, outdir)
[docs]class Alias(Base): """Represents an alias of a drug.""" __tablename__ = ALIAS_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(Text) drug_id = Column(Integer, ForeignKey(f'{DRUG_TABLE_NAME}.id'), nullable=False) drug = relationship(Drug, backref=backref('aliases')) def __repr__(self): return self.name
[docs]class AtcCode(Base): """Represents an ATC code of a drug.""" __tablename__ = ATC_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(255)) drug_id = Column(Integer, ForeignKey(f'{DRUG_TABLE_NAME}.id'), nullable=False) drug = relationship(Drug, backref=backref('atc_codes')) def __repr__(self): return self.name
[docs]class Group(Base): """Represents a drug group.""" __tablename__ = GROUP_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(255)) drugs = relationship(Drug, secondary=drug_group, lazy='dynamic', backref=backref('groups', lazy='dynamic')) def __repr__(self): return self.name
[docs]class Category(Base): """Represents a drug category.""" __tablename__ = CATEGORY_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(255)) mesh_id = Column(String(32)) drugs = relationship(Drug, secondary=drug_category, lazy='dynamic', backref=backref('categories', lazy='dynamic')) def __repr__(self): return self.name
[docs]class Species(Base): """Represents a species.""" __tablename__ = SPECIES_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(255), unique=True, index=True, nullable=False) def __repr__(self): return self.name
[docs]class Protein(Base): """Represents a protein.""" __tablename__ = PROTEIN_TABLE_NAME id = Column(Integer, primary_key=True) species_id = Column(Integer, ForeignKey(f'{SPECIES_TABLE_NAME}.id'), nullable=False) species = relationship(Species) uniprot_id = Column(String(32)) uniprot_accession = Column(String(32)) name = Column(String(255)) hgnc_id = Column(String(32)) entrez_id = Column(String(32)) def __repr__(self): return self.uniprot_id def as_bel_hgnc(self) -> protein: return protein(namespace='hgnc', identifier=self.hgnc_id)
[docs] def as_bel(self) -> protein: """Serialize as a PyBEL node with the UniProt namespace.""" return protein(namespace='uniprot', identifier=self.uniprot_id)
[docs]class Action(Base): """Represents the action a drug takes in a drug-protein interaction.""" __tablename__ = ACTION_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(255), unique=True, index=True, nullable=False)
[docs]class Article(Base): """Represents an article in PubMed.""" __tablename__ = ARTICLE_TABLE_NAME id = Column(Integer, primary_key=True) pubmed_id = Column(String(255), unique=True, nullable=False, index=True)
[docs]class DrugProteinInteraction(Base): """Represents an interaction between a drug and a protein.""" __tablename__ = DRUG_PROTEIN_TABLE_NAME id = Column(Integer, primary_key=True) drug_id = Column(Integer, ForeignKey(f'{DRUG_TABLE_NAME}.id')) drug = relationship(Drug, backref=backref('protein_interactions')) protein_id = Column(Integer, ForeignKey(f'{PROTEIN_TABLE_NAME}.id')) protein = relationship(Protein, backref=backref('drug_interactions')) category = Column(String(32), nullable=False) # target, enzyme, etc... known_action = Column(Boolean, nullable=False) actions = relationship(Action, secondary=dpi_action, lazy='dynamic', backref=backref('drug_protein_interactions', lazy='dynamic')) articles = relationship(Article, secondary=dpi_article, lazy='dynamic', backref=backref('drug_protein_interactions', lazy='dynamic')) def _add_to_graph(self, graph: BELGraph, u: BaseEntity, v: BaseEntity) -> Set[str]: """Return the set of keys used to add these edges.""" return { graph.add_qualified_edge( u, v, relation=REGULATES, citation=article.pubmed_id, evidence='From DrugBank', annotations={ 'bio2bel': MODULE_NAME, }, object_modifier=activity(), ) for article in self.articles }
[docs] def add_to_graph(self, graph: BELGraph) -> Set[str]: """Add this interaction to the graph. :return: A set of the hashes of the edges that were added """ # TODO update implementation to use actions drug_bel = self.drug.as_bel() protein_bel = self.protein.as_bel() return self._add_to_graph(graph, drug_bel, protein_bel)