Source code for alphaviz.utils

import os
import numba
from numba import types
from numba.typed import Dict, List
from numba import njit
import numpy as np

# paths
BASE_PATH = os.path.dirname(__file__)
IMG_PATH = os.path.join(BASE_PATH, "img")
STYLE_PATH = os.path.join(BASE_PATH, "style")
DOCS_PATH = os.path.join(BASE_PATH, "docs")
DATA_PATH = os.path.join(BASE_PATH, "data")
MODELS_PATH = os.path.join(BASE_PATH, "models")
LATEST_GITHUB_INIT_FILE = "https://github.com/MannLabs/alphaviz/blob/main/alphaviz/__init__.py"


[docs]def check_analysis_file(file): # TODO: write the checks for the preloaded file and return the exception when the file can't be uploaded pass
# this code was taken from the AlphaTims Python package (https://github.com/MannLabs/alphatims/blob/master/alphatims/utils.py) and modified
[docs]def check_github_version(silent=False) -> str: """Checks and logs the current version of AlphaViz. Check if the local version equals the AlphaViz GitHub master branch. This is only possible with an active internet connection and if no credentials are required for GitHub. Parameters ---------- silent : str Use the logger to display the obtained conclusion. Default is False. Returns ------- : str The version on the AlphaViz GitHub master branch. "" if no version can be found on GitHub """ import requests from bs4 import BeautifulSoup import alphaviz try: main_response = requests.get(LATEST_GITHUB_INIT_FILE) main_soap = BeautifulSoup(main_response.content.decode('utf-8'), 'html.parser') for line in main_soap.find_all('td', class_='blob-code blob-code-inner js-file-line'): if line.text.startswith('__version__'): github_version = line.text.split()[-1].strip()[1:-1] if not silent: if github_version != alphaviz.__version__: print(f"You are currently using AlphaViz version {alphaviz.__version__}. However, the latest version of AlphaViz on GitHub is {github_version}. Checkout https://github.com/MannLabs/alphaviz.git for instructions on how to update AlphaViz...") else: print("Current AlphaViz version is up-to-date with GitHub.") return github_version except: print("Could not check GitHub for the latest AlphaViz release.") return ""
# This code was taken from the AlphaPept Python package (https://github.com/MannLabs/alphapept/blob/master/nbs/03_fasta.ipynb)
[docs]def get_mass_dict( modfile: str = "data/modifications.tsv", aasfile: str = "data/amino_acids.tsv", verbose: bool = True ): """ Function to create a mass dict based on tsv files. This is used to create the hardcoded dict in the constants notebook. The dict needs to be hardcoded because of importing restrictions when using numba. More specifically, a global needs to be typed at runtime. Args: modfile (str): Filename of modifications file. aasfile (str): Filename of AAs file. verbose (bool, optional): Flag to print dict. Returns: Returns a numba compatible dictionary with masses. Raises: FileNotFoundError: If files are not found. """ import pandas as pd mods = pd.read_csv(modfile, delimiter="\t") aas = pd.read_csv(aasfile, delimiter="\t") mass_dict = Dict.empty(key_type=types.unicode_type, value_type=types.float64) for identifier, mass in aas[["Identifier", "Monoisotopic Mass (Da)"]].values: mass_dict[identifier] = float(mass) for identifier, aar, mass in mods[ ["Identifier", "Amino Acid Residue", "Monoisotopic Mass Shift (Da)"] ].values: if ("<" in identifier) or (">" in identifier): for aa_identifier, aa_mass in aas[["Identifier", "Monoisotopic Mass (Da)"]].values: if '^' in identifier: new_identifier = identifier[:-2] + aa_identifier mass_dict[new_identifier] = float(mass) + mass_dict[aa_identifier] elif aar == aa_identifier: new_identifier = identifier[:-2] + aa_identifier mass_dict[new_identifier] = float(mass) + mass_dict[aa_identifier] else: pass else: mass_dict[identifier] = float(mass) + mass_dict[aar] # Manually add other masses mass_dict["Electron"] = (0.000548579909070) # electron mass, half a millimass error if not taken into account mass_dict["Proton"] = 1.00727646687 # proton mass mass_dict["Hydrogen"] = 1.00782503223 # hydrogen mass mass_dict["C13"] = 13.003354835 # C13 mass mass_dict["Oxygen"] = 15.994914619 # oxygen mass mass_dict["OH"] = mass_dict["Oxygen"] + mass_dict["Hydrogen"] # OH mass mass_dict["H2O"] = mass_dict["Oxygen"] + 2 * mass_dict["Hydrogen"] # H2O mass mass_dict["NH3"] = 17.03052 mass_dict["delta_M"] = 1.00286864 mass_dict["delta_S"] = 0.0109135 mass_dict['a'] = 42.01056469 if verbose: for element in mass_dict: print('mass_dict["{}"] = {}'.format(element, mass_dict[element])) return mass_dict
[docs]@njit def parse( peptide: str ) -> List: """ Parser to parse peptide strings Args: peptide (str): modified peptide sequence. Return: List (numba.typed.List): a list of animo acids and modified amono acids """ if "_" in peptide: peptide = peptide.split("_")[0] parsed = List() string = "" for ind, i in enumerate(peptide): string += i if ind == 0 and i == 'a' and peptide[1].islower(): # protein N-term modification parsed.append(string) string = "" if i.isupper(): parsed.append(string) string = "" return parsed
[docs]@njit def get_precmass( parsed_pep: list, mass_dict: numba.typed.Dict ) -> float: """ Calculate the mass of the neutral precursor Args: parsed_pep (list or numba.typed.List of str): the list of amino acids and modified amono acids. mass_dict (numba.typed.Dict): key is the amino acid or the modified amino acid, and the value is the mass. Returns: float: the peptide neutral mass. """ tmass = mass_dict["H2O"] for _ in parsed_pep: tmass += mass_dict[_] return tmass
[docs]@njit def get_fragmass( parsed_pep: list, mass_dict: numba.typed.Dict ) -> tuple: """ Calculate the masses of the fragment ions Args: parsed_pep (numba.typed.List of str): the list of amino acids and modified amono acids. mass_dict (numba.typed.Dict): key is the amino acid or the modified amino acid, and the value is the mass. Returns: Tuple[np.ndarray(np.float64), np.ndarray(np.int8)]: the fragment masses and the fragment types (represented as np.int8). For a fragment type, positive value means the b-ion, the value indicates the position (b1, b2, b3...); the negative value means the y-ion, the absolute value indicates the position (y1, y2, ...). """ n_frags = (len(parsed_pep) - 1) * 2 frag_masses = np.zeros(n_frags, dtype=np.float64) frag_type = np.zeros(n_frags, dtype=np.int8) n_frag = 0 frag_m = mass_dict["Proton"] for idx, _ in enumerate(parsed_pep[:-1]): frag_m += mass_dict[_] frag_masses[n_frag] = frag_m frag_type[n_frag] = (idx+1) n_frag += 1 frag_m = mass_dict["Proton"] + mass_dict["H2O"] for idx, _ in enumerate(parsed_pep[::-1][:-1]): frag_m += mass_dict[_] frag_masses[n_frag] = frag_m frag_type[n_frag] = -(idx+1) n_frag += 1 return frag_masses, frag_type
[docs]def get_frag_dict( parsed_pep: list, mass_dict: dict ) -> dict: """ Calculate the masses of the fragment ions Args: parsed_pep (list or numba.typed.List of str): the list of amino acids and modified amono acids. mass_dict (numba.typed.Dict): key is the amino acid or the modified amino acid, and the value is the mass. Returns: dict{str:float}: key is the fragment type (b1, b2, ..., y1, y2, ...), value is fragment mass. """ frag_dict = {} frag_masses, frag_type = get_fragmass(parsed_pep, mass_dict) for idx, _ in enumerate(frag_masses): cnt = frag_type[idx] if cnt > 0: identifier = 'b' else: identifier = 'y' cnt = -cnt frag_dict[identifier+str(cnt)] = _ return frag_dict
[docs]@njit def calculate_mass( mono_mz: float, charge: int ) -> float: """Calculate the precursor mass from mono mz and charge. Args: mono_mz (float): mono m/z. charge (int): charge. Returns: float: precursor mass. """ M_PROTON = 1.00727646687 prec_mass = mono_mz * abs(charge) - charge * M_PROTON return prec_mass
[docs]@njit def calculate_mz( prec_mass: float, charge: int ) -> float: """Calculate the precursor mono mz from mass and charge. Args: prec_mass (float): precursor mass. charge (int): charge. Returns: float: mono m/z. """ M_PROTON = 1.00727646687 mono_mz = prec_mass / abs(charge) + M_PROTON return mono_mz