API#

Init keggtools module

Models#

KEGG pathway models to parse object relational

class keggtools.models.Relation(entry1: str, entry2: str, type: str)#

Relation model class.

__init__(entry1: str, entry2: str, type: str) None#

Init relation model instance.

Parameters:
  • entry1 (str) – Source entry of relation.

  • entry2 (str) – Destination entry of relation.

  • type (str) – Type of Relation. Must be contained in list of valid relation types.

static parse(item: Element) Relation#

Parse XML element into Relation instance.

Parameters:

item (xml.etree.ElementTree.Element) – XML element to parse.

Returns:

Relation instance.

Return type:

Relation

to_xml() Element#

Generate XML string from Relation element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

class keggtools.models.Subtype(name: str, value: str)#

Subtype model class.

__init__(name: str, value: str) None#

Init Subtype model instance.

Parameters:
  • name (str) – Name of subtype. Must match list of valid subtypes.

  • value (str) – Value of subtype.

static parse(item: Element) Subtype#

Parse Subtype XML element.

Parameters:

item (xml.etree.ElementTree.Element) – XML element.

Returns:

Parsed Subtype instance.

Return type:

Subtype

to_xml() Element#

Generate XML string from Subtype element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

class keggtools.models.Component(id: str)#

Component model.

__init__(id: str) None#

Init Component model.

Parameters:

id (str) – Id of component.

static parse(item: Element) Component#

Parsing ElementTree into Component.

Parameters:

item (xml.etree.ElementTree.Element) – XML element to parse.

Returns:

Parsed Component instance.

Return type:

Component

to_xml() Element#

Generate XML string from Component element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

class keggtools.models.Graphics(x: str | None = None, y: str | None = None, width: str | None = None, height: str | None = None, coords: str | None = None, name: str | None = None, type: str | None = None, fgcolor: str | None = None, bgcolor: str | None = None)#

Graphics information for rendering.

__init__(x: str | None = None, y: str | None = None, width: str | None = None, height: str | None = None, coords: str | None = None, name: str | None = None, type: str | None = None, fgcolor: str | None = None, bgcolor: str | None = None) None#

Init Graphics model instance.

Parameters:
static parse(item: Element) Graphics#

Parse XML element into Graphics instance.

Parameters:

item (xml.etree.ElementTree.Element) – XML element to parse.

Returns:

Parsed Graphics instance.

Return type:

Graphics

to_xml() Element#

Generate XML string from Graphics element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

class keggtools.models.Entry(id: str, name: str, type: str, link: str | None = None, reaction: str | None = None)#

Entry model class.

__init__(id: str, name: str, type: str, link: str | None = None, reaction: str | None = None) None#

Init entry model instance.

Parameters:
  • id (str) – Id of Entry.

  • name (str) – Name of Entry.

  • type (str) – Type of Entry. Must be contained in list of valid entry types.

  • link (Optional[str]) – Link to KEGG database with reference to entry.

  • reaction (Optional[str]) – Reaction TODO: specify. Is str format correct?

property has_multiple_names: bool#

Checks if entry has multiple names that are space seperated.

Returns:

Retruns True if entry has multiple names.

Return type:

bool

static parse(item: Element) Entry#

Parsing XML element into Entry instance.

Parameters:

item (xml.etree.ElementTree.Element) – XML element to parse.

Returns:

Parsed Entry instance.

Return type:

Entry

to_xml() Element#

Generate XML string from Entry element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

get_gene_id() List[str]#

Parse variable ‘name’ of Entry into KEGG id.

Returns:

List of KEGG identifier.

Return type:

List[str]

class keggtools.models.Pathway(name: str, org: str, number: str, title: str | None = None, image: str | None = None, link: str | None = None)#

KEGG Pathway object. The KEGG pathway object stores graphics information and related objects.

__init__(name: str, org: str, number: str, title: str | None = None, image: str | None = None, link: str | None = None) None#

Init KEGG Pathway model.

Parameters:
  • name (str) – Name of pathway, which is the full KEGG identifier.

  • org (str) – Organism code.

  • number (str) – Number of pathway.

  • title (Optional[str]) – Title of pathway.

  • image (Optional[str]) – Image for pathway provided by KEGG database.

  • link (Optional[str]) – Link to pathway in KEGG database.

static parse(data: Element | str) Pathway#

Parsing XML string or element in Pathway instance.

Parameters:

data (Union[xml.etree.ElementTree.Element, str]) – String or XML element to parse.

Returns:

Parsed Pathway instance.

Return type:

Pathway

to_xml() Element#

Generate XML element from Pathway instance and its children.

Returns:

XML element in KGML format.

Return type:

xml.etree.ElementTree.Element

to_xml_string() str#

Generate XML string from pathway instance.

Returns:

XML string in KGML format.

Return type:

str

get_entry_by_id(entry_id: str) Entry | None#

Get pathway Entry object by id.

Parameters:

entry_id (str) – Id of Entry.

Returns:

Returns Entry instance if id is found in Pathway. Otherwise returns None.

Return type:

Optional[Entry]

get_genes() List[str]#

List all genes from pathway.

Returns:

List of entry ids with type gene.

Return type:

List[str]

class keggtools.models.Reaction(id: str, name: str, type: str)#

Reaction model.

__init__(id: str, name: str, type: str) None#

Init Reaction model instance.

Parameters:
  • id (str) – Identifier of reaction.

  • name (str) – KEGG identifer of reaction.

  • type (str) – Type of reaction. Must be contained in list of valid reaction types.

static parse(item: Element) Reaction#

Parse XML element instance to Reaction model instance.

Parameters:

item (xml.etree.ElementTree.Element) – XML element to parse.

Returns:

Parsed Reaction model.

Return type:

Reaction

to_xml() Element#

Generate XML string from Reaction element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

class keggtools.models.Product(id: str, name: str, alt: Alt | None = None)#

Reaction Product model.

__init__(id: str, name: str, alt: Alt | None = None) None#

Init Product instance.

Parameters:
  • id (str) – Identifier of Product in pathway.

  • name (str) – KEGG identifier of compound.

  • alt (Alt) – Alternative name of element.

static parse(item: Element) Product#

Parse XML element instance to Product model instance.

Parameters:

item (xml.etree.ElementTree.Element) – XML element to parse.

Returns:

Parsed Product model.

Return type:

Product

to_xml() Element#

Generate XML string from Product element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

class keggtools.models.Substrate(id: str, name: str, alt: Alt | None = None)#

reaction Substrate model

__init__(id: str, name: str, alt: Alt | None = None) None#

Init Substrate instance.

Parameters:
  • id (str) – Identifier of Substrate in pathway.

  • name (str) – KEGG identifier of compound.

  • alt (Alt) – Alternative name of element.

static parse(item: Element) Substrate#

Parse XML element instance to Substrate model instance.

Parameters:

item (xml.etree.ElementTree.Element) – XML element to parse.

Returns:

Parsed Substrate model.

Return type:

Substrate

to_xml() Element#

Generate XML string from Substrate element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

class keggtools.models.Alt(name: str)#

Alt model.

__init__(name: str) None#

Init Alt instance.

Parameters:

name (str) – Alt element name.

static parse(item: Element) Alt#

Parse Alt instance from XML element.

Parameters:

item (xml.etree.ElementTree.Element) – XML element to parse.

Return type:

Alt

Returns:

Parsed Alt element.

to_xml() Element#

Generate XML string from Alt element.

Returns:

XML string.

Return type:

xml.etree.ElementTree.Element

Resolver#

Resolve requests to KEGG data Api

class keggtools.resolver.Resolver(cache: Storage | str | None = None)#

KEGG pathway resolver class. Request interface for KEGG API endpoint.

__init__(cache: Storage | str | None = None) None#

Init Resolver instance.

Parameters:

cache (Optional[Union[Storage, str]]) – Directory to use as cache storage or Storage instance.

get_pathway_list(organism: str, **kwargs: Any) Dict[str, str]#

Request list of pathways linked to organism.

Parameters:
  • organism (str) – 3 letter organism code used by KEGG database.

  • kwargs (Any) – other arguments to requests.get.

Returns:

Dict in format {<pathway-id>: <name>}.

Return type:

Dict[str, str]

get_pathway(organism: str, code: str, **kwargs: Any) Pathway#

Load and parse KGML pathway by identifier.

Parameters:
  • organism (str) – 3 letter organism code used by KEGG database.

  • code (str) – Pathway identify used by KEGG database.

  • kwargs (Any) – other arguments to requests.get.

Returns:

Returns parsed Pathway instance.

Return type:

Pathway

get_compounds(**kwargs: Any) Dict[str, str]#

Get dict of components. Request from KEGG API if not in cache.

Parameters:

kwargs (Any) – other arguments to requests.get.

Returns:

Dict of compound identifier to compound name.

Return type:

Dict[str, str]

get_organism_list(**kwargs: Any) Dict[str, str]#

Get organism codes from file or KEGG API.

Parameters:

kwargs (Any) – other arguments to requests.get.

Returns:

Dict with format {<org>: <org-name>}

Return type:

Dict[str, str]

check_organism(organism: str) bool#

Check if organism code exist.

Parameters:

organism (str) – 3 letter organism code used by KEGG database.

Returns:

Returns True if organism code is found in list of valid organisms.

Return type:

bool

keggtools.resolver.get_gene_names(genes: List[str], max_genes: int = 50) Dict[str, str]#

Resolve KEGG gene identifer to name using to KEGG database REST Api. Function is implemented outside the resolver instance, because requests are not cached and only gene identifier are used.

Parameters:

genes (List[str]) – List of gene identifer in format “<organism>:<code>”

Returns:

Dict of gene idenifier to gene name.

Return type:

Dict[str, str]

Storage#

Storage of KEGG data. Caching downloaded files from API to local file system.

class keggtools.storage.Storage(cachedir: str | None = None)#

Storage handler class.

__init__(cachedir: str | None = None) None#

Init KEGG data storage instance.

Parameters:

cachedir (Optional[str]) – Path to folder to use as cache.

check_cache_dir() None#

Checks if cache dir exist. Raises “NotADirectoryError” of caching folder not found.

Raises:

NotADirectoryError – Error if cache folder does not exist.

build_cache_path(filename: str) str#

Build absolute filename for caching directory.

Parameters:

filename (str) – Name of file (is used as suffix to cache directory).

Returns:

Full filename with is inside cache folder.

Return type:

str

exist(filename: str) bool#

Check if filename exist in caching dir.

Parameters:

filename (str) – Filename to check.

Returns:

Returns True if file with given name exist in cachedir.

Return type:

bool

save(filename: str, data: str) str#

Save string as file in local storage. Returns absolute filename of save file.

Parameters:
  • filename (str) – Filename to storage file at.

  • data (str) – String data to save to cache file.

Returns:

Full filename to cached file.

Return type:

str

save_dump(filename: str, data: Any) str#

Save binary dump as file in local storage. Returns absolute filename of save file.

Parameters:
  • filename (str) – Filename to storage file at.

  • data (Any) – Data to store to cache file. Can be any object.

Returns:

Full filename to cached file.

Return type:

str

load(filename: str) str#

Load string from file.

Parameters:

filename (str) – Filename of file to load from cache folder.

Returns:

File content string.

Return type:

str

load_dump(filename: str) Any#

Load binary dump from file.

Parameters:

filename (str) – Filename of file to load from cache folder.

Returns:

Object from file.

Return type:

Any

Analysis#

KEGG Enrichment analysis core

class keggtools.analysis.Enrichment(pathways: List[Pathway])#

KEGG pathway enrichment analysis.

__init__(pathways: List[Pathway]) None#

Init KEGG pathway enrichment analysis.

Parameters:
  • org (str) – Organism identifier used by KEGG database (3 letter code, e.g. “mmu” for mus musculus or “hsa” for human).

  • pathways (List[Pathway]) – (Optional) List of Pathway instances or list of KEGG pathway identifier.

get_subset(subset: List[str], inplace: bool = False) List[EnrichmentResult]#

Create subset of analysis result by list of pathway ids

Parameters:
  • subset (List[str]) – List of pathway identifer to filter enrichment result by.

  • inplace (bool) – Update instance variable of enrichment result list and overwrite with generated subset.

Returns:

Subset of enrichment results.

Return type:

List[EnrichmentResult]

run_analysis(gene_list: List[str]) List[EnrichmentResult]#

List of gene ids. Return list of EnrichmentResult instances

Parameters:

gene_list (List[str]) – List of genes to analyse.

Returns:

List of enrichment result instances.

Return type:

List[EnrichmentResult]

to_json() List[Dict[str, Any]]#

Export to json dict.

Return type:

List[Dict[str, Any]]

Returns:

Json dict of enrichment results.

to_csv(file_obj: str | IOBase | Any, delimiter: str = '\t', overwrite: bool = False) None#

Save result summary as file.

Parameters:
  • file_obj (Union[str, io.IOBase, Any]) – String to file or IOBase object

  • delimiter (str) – Deleimiter used for csv.

  • overwrite (bool) – Set to True to overwrite file, if already exist.

to_dataframe() Any#

Return analysis result as pandas DataFrame. Required pandas dependency.

Returns:

Export enrichment results as pandas dataframe.

Return type:

pandas.DataFrame

class keggtools.analysis.EnrichmentResult(org: str, pathway_id: str, pathway_name: str, found_genes: list, pathway_genes: list, pathway_title: str | None = None)#

Results of KEGG pathway enrichment analysis.

__init__(org: str, pathway_id: str, pathway_name: str, found_genes: list, pathway_genes: list, pathway_title: str | None = None) None#

Init Result of KEGG pathway enrichment analysis.

Parameters:
  • org (str) – 3 letter code of organism used by KEGG database.

  • pathway_id (str) – Identifier of KEGG pathway.

  • pathway_name (str) – Name of KEGG pathway.

  • found_genes (list) – List of found genes.

  • pathway_genes (list) – List of all genes in pathway.

property pathway_genes_count: int#

Count of pathway genes.

Return type:

int

Returns:

Number of genes in pathway.

property study_count: int#

Count of study genes.

Return type:

int

Returns:

Number of genes found in analysis of pathway.

json_summary(gene_delimiter: str = ',') Dict[str, Any]#

Build json summary for enrichment analysis.

Parameters:

gene_delimiter (str) – Delimiter to seperate genes in gene list.

Return type:

Dict[str, Any]

Returns:

Summary of enrichment result instance as dict.

static get_header() List[str]#

Build default header for enrichment analysis.

Return type:

List[str]

Returns:

List of header names as string.

Renderer#

Render object

class keggtools.render.Renderer(kegg_pathway: Pathway, gene_dict: Dict[str, float] | None = None, cache_or_resolver: Storage | str | Resolver | None = None, upper_color: Tuple[int, int, int] = (255, 0, 0), lower_color: Tuple[int, int, int] = (0, 0, 255))#

Renderer for KEGG Pathway.

__init__(kegg_pathway: Pathway, gene_dict: Dict[str, float] | None = None, cache_or_resolver: Storage | str | Resolver | None = None, upper_color: Tuple[int, int, int] = (255, 0, 0), lower_color: Tuple[int, int, int] = (0, 0, 255)) None#

Init Renderer instance for KEGG Pathway.

Parameters:
  • kegg_pathway (Pathway) – Pathway instance to render.

  • gene_dict (Optional[Dict[str, float]]) – Dict to specify overlay color gradient to rendered entries.

  • cache (Optional[Union[Storage, str, cache_or_resolver]]) – Specify cache for resolver instance or pass resolver. Resolver is needed to get compound data needed for rendering.

  • upper_color (Tuple[int, int, int]) – Color for upper bound of color gradient.

  • lower_color (Tuple[int, int, int]) – Color for lower bound of color gradient.

property cmap_upreg: List[str]#

Generated color map as list of hexadecimal strings for upregulated genes in gene dict.

property cmap_downreg: List[str]#

Generated color map as list of hexadecimal strings for downregulated genes in gene dict.

get_gene_color(gene_id: str, default_color: Tuple[int, int, int] = (255, 255, 255)) str#

Get overlay color for given gene.

Parameters:
  • gene_id (str) – Identify of gene.

  • default_color (Tuple[int, int, int]) – Default color to return if gene is not found in gene_dict. Format in RGB tuple.

Returns:

Color of gene by expression level specified in gene_dict.

Return type:

str

render(display_unlabeled_genes: bool = True) None#

Render KEGG pathway.

Parameters:

display_unlabeled_genes (bool) – Entries in the KGML format can have space-seperated entry names. Set this parameter to False to hide the entries.

to_string() str#

pydot graph instance to dot string.

Returns:

Generated dot string of pathway.

Return type:

str

to_binary(extension: str) bytes#

Export pydot graph to binary data.

Parameters:

extension (str) – Extension of file to export. Use format string like “png”, “svg”, “pdf” or “jpeg”.

Returns:

File content are bytes object.

Return type:

bytes

Raises:

TypeError – If variable with generated dot graph is not type bytes.

to_file(filename: str, extension: str) None#

Export pydot graph to file.

Parameters:
  • filename (str) – Filename to save file at.

  • extension (str) – Extension of file to export. Use format string like “png”, “svg”, “pdf” or “jpeg”.

keggtools.render.generate_embedded_html_table(items: Dict[str, str], border: int = 0, cellborder: int = 1, truncate: int | None = None) str#

Generate HTML table in insert into label of dot node.

generate_embedded_html_table({“gene1”: “#ffffff”, “gene2”: “#454545”})

Parameters:
  • items (Dict[str, str]) – Items are dicts with have format {name: hex_color}.

  • border (int) – Thickness of table border. (Default: 0)

  • cellborder (int) – Thickness of cell border within the table. (Default: 1)

  • truncate (Optional[int]) – Maximal number of items in table. Set to None to disable trunaction. (Default: None)

Returns:

Returns html string of table.

Return type:

str

Utils#

Basic utils for HTTP requests, parsing and rendering

class keggtools.utils.ColorGradient(start: tuple, stop: tuple, steps: int = 100)#

Create color gradient.

__init__(start: tuple, stop: tuple, steps: int = 100) None#

Init ColorGradient instance.

Parameters:
  • start (tuple) – Color tuple

  • stop (tuple) – Color tuple

  • steps (int) – Number of steps.

static to_css(color: tuple) str#

Convert color tuple to CSS rgb color string.

Parameters:

color (tuple) – RGB color tuple containing 3 integers

Returns:

Color as CSS string (e.g. “rgb(0, 0, 0)”).

Return type:

str

static to_hex(color: tuple) str#

Convert color tuple to hex color string.

Parameters:

color (tuple) – RGB color tuple containing 3 integers.

Returns:

Hexadecimal color string (e.g. “#000000”).

Return type:

str

get_list() List[str]#

Get gradient color as list.

Returns:

Returns list of hexadecimal color strings with a gradient.

Return type:

List[str]

keggtools.utils.get_attribute(element: Element, key: str) str#

Get attribute from XML Element object. Raises KeyError is Attribute is not found or not valid.

Parameters:
Returns:

Value of attribute.

Return type:

str

Raises:

ValueError – Error if attribute does not exist or is wrong type.

keggtools.utils.get_numeric_attribute(element: Element, key: str) str#

Get attribute from XML Element object. Raises KeyError is Attribute is not found or not valid.

Parameters:
  • element (Element) – XML element to get attribute from.

  • key (str) – Name of attribute.

Returns:

Value of attribute. ValueError is raised if value is not a numeric string.

Return type:

str

Raises:

ValueError – Error is attribute is not a digit (numberic string), does not exist or is wrong type.

keggtools.utils.parse_tsv(data: str) list#

Parse .tsv file from string

Parameters:

data (str) – Tsv string to parse into list.

Returns:

List of items.

Return type:

list

keggtools.utils.parse_tsv_to_dict(data: str, col_keys: int = 0, col_values: int = 1) Dict[str, str]#

Parse .tsv file from string and build dict from first two columns. Other columns are ignored.

Parameters:
  • data (str) – Tsv string to parse.

  • col_keys (int) – Number of colum to parse as dict keys (0-index).

  • col_values (int) – Number of colum to parse as dict values (0-index).

Returns:

Dict of two tsv columns.

Return type:

Dict[str, str]

keggtools.utils.parse_xml(xml_object_or_string: str | Element) Element#

Returns XML Element object from string or XML Element.

Parameters:

xml_object_or_string (Union[str, xml.etree.ElementTree.Element]) – Input parameter to check.

Returns:

XML element instance.

Return type:

xml.etree.ElementTree.Element

keggtools.utils.is_valid_hex_color(value: str) bool#

Check if string is a valid hex color.

Parameters:

value (str) – String value to check.

Returns:

Returns True if value is valid hexadecimal color string.

Return type:

bool

keggtools.utils.is_valid_pathway_name(value: str) bool#

Check if combined pathway identifer is valid. String must match “path:<org><number>”.

Parameters:

value (str) – String value to check.

Returns:

Returns True if value matches format of pathway name.

Return type:

bool

keggtools.utils.is_valid_pathway_number(value: str) bool#

Check if pathway number has correct 5 digit format.

Parameters:

value (str) – String value to check.

Returns:

Returns True if value has the correct format of pathway number.

Return type:

bool

keggtools.utils.is_valid_pathway_org(value: str) bool#

Check if organism identifier is valid.

Parameters:

value (str) – String value to check.

Returns:

Returns True if value is a valid organism code.

Return type:

bool

keggtools.utils.is_valid_gene_name(value: str) bool#

Check if gene identifer is valid. String must match “<org>:<number>”.

Parameters:

value (str) – String value to check.

Returns:

Returns True if value matches format of gene name.

Return type:

bool