Publications
Identifying Đisease-Causing Mutations with Privacy Protection Bioinformatics, 2020. @article{pmid32683440, title = {Identifying Đisease-Causing Mutations with Privacy Protection}, year = {2020}, date = {2020-07-01}, journal = {Bioinformatics}, abstract = {The use of genome data for diagnosis and treatment is becoming increasingly common. Researchers need access to as many genomes as possible to interpret the patient genome, to obtain some statistical patterns, and to reveal disease-gene relationships. The sensitive information contained in the genome data and the high risk of re-identification increase the privacy and security concerns associated with sharing such data. In this paper, we present an approach to identify disease-associated variants and genes while ensuring patient privacy. The proposed method uses secure multi-party computation to find disease-causing mutations under specific inheritance models without sacrificing the privacy of individuals. It discloses only variants or genes obtained as a result of the analysis. Thus, the vast majority of patient data can be kept private. Our prototype implementation performs analyses on thousands of genomic data in milliseconds, and the runtime scales logarithmically with the number of patients. We present the first inheritance model (recessive, dominant, compound heterozygous) based privacy-preserving analyses of genomic data in order to find disease-causing mutations. Furthermore, we reimplement the privacy-preserving methods (MAX, SETDIFF, and INTERSECTION) proposed in a previous study. Our MAX, SETDIFF, and INTERSECTION implementations are 2.5, 1122, and 341 times faster than the corresponding operations of the state-of-the-art protocol, respectively. https://gitlab.com/DIFUTURE/privacy-preserving-genomic-diagnosis. Supplementary data are available at Bioinformatics online.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The use of genome data for diagnosis and treatment is becoming increasingly common. Researchers need access to as many genomes as possible to interpret the patient genome, to obtain some statistical patterns, and to reveal disease-gene relationships. The sensitive information contained in the genome data and the high risk of re-identification increase the privacy and security concerns associated with sharing such data. In this paper, we present an approach to identify disease-associated variants and genes while ensuring patient privacy. The proposed method uses secure multi-party computation to find disease-causing mutations under specific inheritance models without sacrificing the privacy of individuals. It discloses only variants or genes obtained as a result of the analysis. Thus, the vast majority of patient data can be kept private. Our prototype implementation performs analyses on thousands of genomic data in milliseconds, and the runtime scales logarithmically with the number of patients. We present the first inheritance model (recessive, dominant, compound heterozygous) based privacy-preserving analyses of genomic data in order to find disease-causing mutations. Furthermore, we reimplement the privacy-preserving methods (MAX, SETDIFF, and INTERSECTION) proposed in a previous study. Our MAX, SETDIFF, and INTERSECTION implementations are 2.5, 1122, and 341 times faster than the corresponding operations of the state-of-the-art protocol, respectively. https://gitlab.com/DIFUTURE/privacy-preserving-genomic-diagnosis. Supplementary data are available at Bioinformatics online. |
Gleim, L C; Karim, M R; Zimmermann, L; Kohlbacher, O; Stenzhorn, H; Decker, S; Beyan, O Enabling ad-hoc reuse of private data repositories through schema extraction J Biomed Semantics, 11 (1), pp. 6, 2020. @article{pmid32641124, title = {Enabling ad-hoc reuse of private data repositories through schema extraction}, author = {L C Gleim and M R Karim and L Zimmermann and O Kohlbacher and H Stenzhorn and S Decker and O Beyan}, year = {2020}, date = {2020-07-01}, journal = {J Biomed Semantics}, volume = {11}, number = {1}, pages = {6}, abstract = {Sharing sensitive data across organizational boundaries is often significantly limited by legal and ethical restrictions. Regulations such as the EU General Data Protection Rules (GDPR) impose strict requirements concerning the protection of personal and privacy sensitive data. Therefore new approaches, such as the Personal Health Train initiative, are emerging to utilize data right in their original repositories, circumventing the need to transfer data. Circumventing limitations of previous systems, this paper proposes a configurable and automated schema extraction and publishing approach, which enables ad-hoc SPARQL query formulation against RDF triple stores without requiring direct access to the private data. The approach is compatible with existing Semantic Web-based technologies and allows for the subsequent execution of such queries in a safe setting under the data provider's control. Evaluation with four distinct datasets shows that a configurable amount of concise and task-relevant schema, closely describing the structure of the underlying data, was derived, enabling the schema introspection-assisted authoring of SPARQL queries. Automatically extracting and publishing data schema can enable the introspection-assisted creation of data selection and integration queries. In conjunction with the presented system architecture, this approach can enable reuse of data from private repositories and in settings where agreeing upon a shared schema and encoding a priori is infeasible. As such, it could provide an important step towards reuse of data from previously inaccessible sources and thus towards the proliferation of data-driven methods in the biomedical domain.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Sharing sensitive data across organizational boundaries is often significantly limited by legal and ethical restrictions. Regulations such as the EU General Data Protection Rules (GDPR) impose strict requirements concerning the protection of personal and privacy sensitive data. Therefore new approaches, such as the Personal Health Train initiative, are emerging to utilize data right in their original repositories, circumventing the need to transfer data. Circumventing limitations of previous systems, this paper proposes a configurable and automated schema extraction and publishing approach, which enables ad-hoc SPARQL query formulation against RDF triple stores without requiring direct access to the private data. The approach is compatible with existing Semantic Web-based technologies and allows for the subsequent execution of such queries in a safe setting under the data provider's control. Evaluation with four distinct datasets shows that a configurable amount of concise and task-relevant schema, closely describing the structure of the underlying data, was derived, enabling the schema introspection-assisted authoring of SPARQL queries. Automatically extracting and publishing data schema can enable the introspection-assisted creation of data selection and integration queries. In conjunction with the presented system architecture, this approach can enable reuse of data from private repositories and in settings where agreeing upon a shared schema and encoding a priori is infeasible. As such, it could provide an important step towards reuse of data from previously inaccessible sources and thus towards the proliferation of data-driven methods in the biomedical domain. |
Starke, R; Oliphant, K; Jehmlich, N; Sch?pe, S S; Sachsenberg, T; Kohlbacher, O; Allen-Vercoe, E; von Bergen, M Corrigendum to Ŧracing incorporation of heavy water into proteins for species-specific metabolic activity in complex communities J Proteomics, 224 , pp. 103829, 2020. (BibTeX) @article{pmid32467047, title = {Corrigendum to Ŧracing incorporation of heavy water into proteins for species-specific metabolic activity in complex communities}, author = {R Starke and K Oliphant and N Jehmlich and S S Sch?pe and T Sachsenberg and O Kohlbacher and E Allen-Vercoe and M von Bergen}, year = {2020}, date = {2020-07-01}, journal = {J Proteomics}, volume = {224}, pages = {103829}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Scheidt, T; Alka, O; Gonczarowska-Jorge, H; Gruber, W; Rathje, F; DellÁica, M; Rurik, M; Kohlbacher, O; Zahedi, R P; Aberger, F; Huber, C G Phosphoproteomics of short-term hedgehog signaling in human medulloblastoma cells Cell Commun. Signal, 18 (1), pp. 99, 2020. @article{pmid32576205, title = {Phosphoproteomics of short-term hedgehog signaling in human medulloblastoma cells}, author = {T Scheidt and O Alka and H Gonczarowska-Jorge and W Gruber and F Rathje and M DellÁica and M Rurik and O Kohlbacher and R P Zahedi and F Aberger and C G Huber}, year = {2020}, date = {2020-06-01}, journal = {Cell Commun. Signal}, volume = {18}, number = {1}, pages = {99}, abstract = {Aberrant hedgehog (HH) signaling is implicated in the development of various cancer entities such as medulloblastoma. Activation of GLI transcription factors was revealed as the driving force upon pathway activation. Increased phosphorylation of essential effectors such as Smoothened (SMO) and GLI proteins by kinases including Protein Kinase A, Casein Kinase 1, and Glycogen Synthase Kinase 3 β controls effector activity, stability and processing. However, a deeper and more comprehensive understanding of phosphorylation in the signal transduction remains unclear, particularly during early response processes involved in SMO activation and preceding GLI target gene regulation. We applied temporal quantitative phosphoproteomics to reveal phosphorylation dynamics underlying the short-term chemical activation and inhibition of early hedgehog signaling in HH responsive human medulloblastoma cells. Medulloblastoma cells were treated for 5.0 and 15 min with Smoothened Agonist (SAG) to induce and with vismodegib to inhibit the HH pathway. Our phosphoproteomic profiling resulted in the quantification of 7700 and 10,000 phosphosites after 5.0 and 15 min treatment, respectively. The data suggest a central role of phosphorylation in the regulation of ciliary assembly, trafficking, and signal transduction already after 5.0 min treatment. ERK/MAPK signaling, besides Protein Kinase A signaling and mTOR signaling, were differentially regulated after short-term treatment. Activation of Polo-like Kinase 1 and inhibition of Casein Kinase 2A1 were characteristic for vismodegib treatment, while SAG treatment induced Aurora Kinase A activity. Distinctive phosphorylation of central players of HH signaling such as SMO, SUFU, GLI2 and GLI3 was observed only after 15 min treatment. This study provides evidence that phosphorylation triggered in response to SMO modulation dictates the localization of hedgehog pathway components within the primary cilium and affects the regulation of the SMO-SUFU-GLI axis. The data are relevant for the development of targeted therapies of HH-associated cancers including sonic HH-type medulloblastoma. A deeper understanding of the mechanisms of action of SMO inhibitors such as vismodegib may lead to the development of compounds causing fewer adverse effects and lower frequencies of drug resistance. Video Abstract.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Aberrant hedgehog (HH) signaling is implicated in the development of various cancer entities such as medulloblastoma. Activation of GLI transcription factors was revealed as the driving force upon pathway activation. Increased phosphorylation of essential effectors such as Smoothened (SMO) and GLI proteins by kinases including Protein Kinase A, Casein Kinase 1, and Glycogen Synthase Kinase 3 β controls effector activity, stability and processing. However, a deeper and more comprehensive understanding of phosphorylation in the signal transduction remains unclear, particularly during early response processes involved in SMO activation and preceding GLI target gene regulation. We applied temporal quantitative phosphoproteomics to reveal phosphorylation dynamics underlying the short-term chemical activation and inhibition of early hedgehog signaling in HH responsive human medulloblastoma cells. Medulloblastoma cells were treated for 5.0 and 15 min with Smoothened Agonist (SAG) to induce and with vismodegib to inhibit the HH pathway. Our phosphoproteomic profiling resulted in the quantification of 7700 and 10,000 phosphosites after 5.0 and 15 min treatment, respectively. The data suggest a central role of phosphorylation in the regulation of ciliary assembly, trafficking, and signal transduction already after 5.0 min treatment. ERK/MAPK signaling, besides Protein Kinase A signaling and mTOR signaling, were differentially regulated after short-term treatment. Activation of Polo-like Kinase 1 and inhibition of Casein Kinase 2A1 were characteristic for vismodegib treatment, while SAG treatment induced Aurora Kinase A activity. Distinctive phosphorylation of central players of HH signaling such as SMO, SUFU, GLI2 and GLI3 was observed only after 15 min treatment. This study provides evidence that phosphorylation triggered in response to SMO modulation dictates the localization of hedgehog pathway components within the primary cilium and affects the regulation of the SMO-SUFU-GLI axis. The data are relevant for the development of targeted therapies of HH-associated cancers including sonic HH-type medulloblastoma. A deeper understanding of the mechanisms of action of SMO inhibitors such as vismodegib may lead to the development of compounds causing fewer adverse effects and lower frequencies of drug resistance. Video Abstract. |
Samonig, L; Loipetzberger, A; Bl?chl, C; Rurik, M; Kohlbacher, O; Aberger, F; Huber, C G Proteins and Molecular Pathways Relevant for the Malignant Properties of Ŧumor-Initiating Pancreatic Cancer Cells Cells, 9 (6), 2020. @article{pmid32503348, title = {Proteins and Molecular Pathways Relevant for the Malignant Properties of Ŧumor-Initiating Pancreatic Cancer Cells}, author = {L Samonig and A Loipetzberger and C Bl?chl and M Rurik and O Kohlbacher and F Aberger and C G Huber}, year = {2020}, date = {2020-06-01}, journal = {Cells}, volume = {9}, number = {6}, abstract = {Cancer stem cells (CSCs), a small subset of the tumor bulk with highly malignant properties, are deemed responsible for tumor initiation, growth, metastasis, and relapse. In order to reveal molecular markers and determinants of their tumor-initiating properties, we enriched rare stem-like pancreatic tumor-initiating cells (TICs) by harnessing their clonogenic growth capacity in three-dimensional multicellular spheroid cultures. We compared pancreatic TICs isolated from three-dimensional tumor spheroid cultures with nontumor-initiating cells (non-TICs) enriched in planar cultures. Employing differential proteomics (PTX), we identified more than 400 proteins with significantly different expression in pancreatic TICs and the non-TIC population. By combining the unbiased PTX with mRNA expression analysis and literature-based predictions of pro-malignant functions, we nominated the two calcium-binding proteins S100A8 (MRP8) and S100A9 (MRP14) as well as galactin-3-binding protein LGALS3BP (MAC-2-BP) as putative determinants of pancreatic TICs. In silico pathway analysis followed by candidate-based RNA interference mediated loss-of-function analysis revealed a critical role of S100A8, S100A9, and LGALS3BP as molecular determinants of TIC proliferation, migration, and in vivo tumor growth. Our study highlights the power of combining unbiased proteomics with focused gene expression and functional analyses for the identification of novel key regulators of TICs, an approach that warrants further application to identify proteins and pathways amenable to drug targeting.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Cancer stem cells (CSCs), a small subset of the tumor bulk with highly malignant properties, are deemed responsible for tumor initiation, growth, metastasis, and relapse. In order to reveal molecular markers and determinants of their tumor-initiating properties, we enriched rare stem-like pancreatic tumor-initiating cells (TICs) by harnessing their clonogenic growth capacity in three-dimensional multicellular spheroid cultures. We compared pancreatic TICs isolated from three-dimensional tumor spheroid cultures with nontumor-initiating cells (non-TICs) enriched in planar cultures. Employing differential proteomics (PTX), we identified more than 400 proteins with significantly different expression in pancreatic TICs and the non-TIC population. By combining the unbiased PTX with mRNA expression analysis and literature-based predictions of pro-malignant functions, we nominated the two calcium-binding proteins S100A8 (MRP8) and S100A9 (MRP14) as well as galactin-3-binding protein LGALS3BP (MAC-2-BP) as putative determinants of pancreatic TICs. In silico pathway analysis followed by candidate-based RNA interference mediated loss-of-function analysis revealed a critical role of S100A8, S100A9, and LGALS3BP as molecular determinants of TIC proliferation, migration, and in vivo tumor growth. Our study highlights the power of combining unbiased proteomics with focused gene expression and functional analyses for the identification of novel key regulators of TICs, an approach that warrants further application to identify proteins and pathways amenable to drug targeting. |
Starke, R; Oliphant, K; Jehmlich, N; Sch?pe, S S; Sachsenberg, T; Kohlbacher, O; Allen-Vercoe, E; von Bergen, M Ŧracing incorporation of heavy water into proteins for species-specific metabolic activity in complex communities J Proteomics, 222 , pp. 103791, 2020. @article{pmid32335296, title = {Ŧracing incorporation of heavy water into proteins for species-specific metabolic activity in complex communities}, author = {R Starke and K Oliphant and N Jehmlich and S S Sch?pe and T Sachsenberg and O Kohlbacher and E Allen-Vercoe and M von Bergen}, year = {2020}, date = {2020-06-01}, journal = {J Proteomics}, volume = {222}, pages = {103791}, abstract = {Stable isotope probing (SIP) approaches are a suitable tool to identify active organisms in bacterial communities, but adding isotopically labeled substrate can alter both the structure and the functionality of the community. Here, we validated and demonstrated a substrate-independent protein-SIP protocol using isotopically labeled water that captures the entire microbial activity of a community. We found that 18O yielded a higher incorporation rate into peptides and thus comprised a higher sensitivity. We then applied the method to an in vitro model of a human distal gut microbial ecosystem grown in two medium formulations, to evaluate changes in microbial activity between a high-fiber and high-protein diet. We showed that only little changes are seen in the community structure but the functionality varied between the diets. In conclusion, our approach can detect species-specific metabolic activity in complex bacterial communities and more specifically to quantify the amount of amino acid synthesis. Heavy water makes possible to analyze the activity of bacterial communities for which adding an isotopically labeled energy and nutrient sources is not easily feasible. SIGNIFICANCE: Heavy stable isotopes allow for the detection of active key players in complex ecosystems where many organisms are thought to be dormant. Opposed to the labelling with energy or nutrient sources, heavy water could be a suitable replacement to trace activity, which has been shown for DNA and RNA. Here we validate, quantify and compare the incorporation of heavy water either labeled with deuterium or 18‑oxygen into proteins of Escherichia coli K12 and of an in vitro model of a human gut microbial ecosystem. The significance of our research is in providing a freely available pipeline to analyze the incorporation of deuterium and 18‑oxygen into proteins together with the validation of the applicability of tracing heavy water as a proxy for activity. Our approach unveils the relative functional contribution of microbiota in complex ecosystems, which will improve our understanding of both animal- and environment-associated microbiomes and in vitro models.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Stable isotope probing (SIP) approaches are a suitable tool to identify active organisms in bacterial communities, but adding isotopically labeled substrate can alter both the structure and the functionality of the community. Here, we validated and demonstrated a substrate-independent protein-SIP protocol using isotopically labeled water that captures the entire microbial activity of a community. We found that 18O yielded a higher incorporation rate into peptides and thus comprised a higher sensitivity. We then applied the method to an in vitro model of a human distal gut microbial ecosystem grown in two medium formulations, to evaluate changes in microbial activity between a high-fiber and high-protein diet. We showed that only little changes are seen in the community structure but the functionality varied between the diets. In conclusion, our approach can detect species-specific metabolic activity in complex bacterial communities and more specifically to quantify the amount of amino acid synthesis. Heavy water makes possible to analyze the activity of bacterial communities for which adding an isotopically labeled energy and nutrient sources is not easily feasible. SIGNIFICANCE: Heavy stable isotopes allow for the detection of active key players in complex ecosystems where many organisms are thought to be dormant. Opposed to the labelling with energy or nutrient sources, heavy water could be a suitable replacement to trace activity, which has been shown for DNA and RNA. Here we validate, quantify and compare the incorporation of heavy water either labeled with deuterium or 18‑oxygen into proteins of Escherichia coli K12 and of an in vitro model of a human gut microbial ecosystem. The significance of our research is in providing a freely available pipeline to analyze the incorporation of deuterium and 18‑oxygen into proteins together with the validation of the applicability of tracing heavy water as a proxy for activity. Our approach unveils the relative functional contribution of microbiota in complex ecosystems, which will improve our understanding of both animal- and environment-associated microbiomes and in vitro models. |
Jeong, Kyowon; Kim, Jihyung; Gaikwad, Manasi; Hidayah, Siti Nurul; Heikaus, Laura; Schlüter, Hartmut; Kohlbacher, Oliver FLASHDeconv: Ultrafast, High-Quality Feature Deconvolution for Top-Down Proteomics Cell Systems, 10 (2), pp. 213-218, 2020, ISSN: 2405-4712. @article{FLASHDeconvCellSys2020, title = {FLASHDeconv: Ultrafast, High-Quality Feature Deconvolution for Top-Down Proteomics}, author = {Kyowon Jeong and Jihyung Kim and Manasi Gaikwad and Siti Nurul Hidayah and Laura Heikaus and Hartmut Schlüter and Oliver Kohlbacher}, url = {https://doi.org/10.1016/j.cels.2020.01.003}, doi = {10.1016/j.cels.2020.01.003}, issn = {2405-4712}, year = {2020}, date = {2020-02-26}, journal = {Cell Systems}, volume = {10}, number = {2}, pages = {213-218}, publisher = {Elsevier}, abstract = {Top-down mass spectrometry (TD-MS)-based proteomics analyzes intact proteoforms and thus preserves information about individual protein species. The MS signal of these high-mass analytes is complex and challenges the accurate determination of proteoform masses. Fast and accurate feature deconvolution (i.e., the determination of intact proteoform masses) is, therefore, an essential step for TD data analysis. Here, we present FLASHDeconv, an algorithm achieving higher deconvolution quality, with an execution speed two orders of magnitude faster than existing approaches. FLASHDeconv transforms peak positions (m/z) within spectra into log m/z space. This simple transformation turns the deconvolution problem into a search for constant patterns, thereby greatly accelerating the process. In both simple and complex samples, FLASHDeconv reports more genuine feature masses and substantially fewer artifacts than other existing methods. FLASHDeconv is freely available for download here: https://www.openms.org/flashdeconv/. A record of this paper?s Transparent Peer Review process is included in the Supplemental Information.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Top-down mass spectrometry (TD-MS)-based proteomics analyzes intact proteoforms and thus preserves information about individual protein species. The MS signal of these high-mass analytes is complex and challenges the accurate determination of proteoform masses. Fast and accurate feature deconvolution (i.e., the determination of intact proteoform masses) is, therefore, an essential step for TD data analysis. Here, we present FLASHDeconv, an algorithm achieving higher deconvolution quality, with an execution speed two orders of magnitude faster than existing approaches. FLASHDeconv transforms peak positions (m/z) within spectra into log m/z space. This simple transformation turns the deconvolution problem into a search for constant patterns, thereby greatly accelerating the process. In both simple and complex samples, FLASHDeconv reports more genuine feature masses and substantially fewer artifacts than other existing methods. FLASHDeconv is freely available for download here: https://www.openms.org/flashdeconv/. A record of this paper?s Transparent Peer Review process is included in the Supplemental Information. |
Samuel Wein Byron Andrews, Timo Sachsenberg Helena Santos-Rosa Oliver Kohlbacher Tony Kouzarides Benjamin Garcia ; Weisser, Hendrik A computational platform for high-throughput analysis of RNA sequences and modifications by mass spectrometry Nat. Commun., 11 (1), pp. 926, 2020. @article{Wein_NASE_2020, title = {A computational platform for high-throughput analysis of RNA sequences and modifications by mass spectrometry}, author = {Samuel Wein, Byron Andrews, Timo Sachsenberg, Helena Santos-Rosa, Oliver Kohlbacher, Tony Kouzarides, Benjamin Garcia, and Hendrik Weisser}, url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7026122/}, doi = {10.1038/s41467-020-14665-7}, year = {2020}, date = {2020-02-17}, journal = {Nat. Commun.}, volume = {11}, number = {1}, pages = {926}, abstract = {The field of epitranscriptomics continues to reveal how post-transcriptional modification of RNA affects a wide variety of biological phenomena. A pivotal challenge in this area is the identification of modified RNA residues within their sequence contexts. Mass spectrometry (MS) offers a comprehensive solution by using analogous approaches to shotgun proteomics. However, software support for the analysis of RNA MS data is inadequate at present and does not allow high-throughput processing. Existing software solutions lack the raw performance and statistical grounding to efficiently handle the numerous modifications found on RNA. We present a free and open-source database search engine for RNA MS data, called NucleicAcidSearchEngine (NASE), that addresses these shortcomings. We demonstrate the capability of NASE to reliably identify a wide range of modified RNA sequences in four original datasets of varying complexity. In human tRNA, we characterize over 20 different modification types simultaneously and find many cases of incomplete modification.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The field of epitranscriptomics continues to reveal how post-transcriptional modification of RNA affects a wide variety of biological phenomena. A pivotal challenge in this area is the identification of modified RNA residues within their sequence contexts. Mass spectrometry (MS) offers a comprehensive solution by using analogous approaches to shotgun proteomics. However, software support for the analysis of RNA MS data is inadequate at present and does not allow high-throughput processing. Existing software solutions lack the raw performance and statistical grounding to efficiently handle the numerous modifications found on RNA. We present a free and open-source database search engine for RNA MS data, called NucleicAcidSearchEngine (NASE), that addresses these shortcomings. We demonstrate the capability of NASE to reliably identify a wide range of modified RNA sequences in four original datasets of varying complexity. In human tRNA, we characterize over 20 different modification types simultaneously and find many cases of incomplete modification. |
Pfeuffer Julianus; Sachsenberg, Timo; Dijkstra Tjeerd Serang Oliver; Reinert Knut; Kohlbacher Oliver M H ; EPIFANY - A method for efficient high-confidence protein inference J. Proteome Res., 2020. @article{EPIFANY2020, title = {EPIFANY - A method for efficient high-confidence protein inference}, author = {Pfeuffer, Julianus; Sachsenberg, Timo; Dijkstra, Tjeerd M. H.; Serang, Oliver; Reinert, Knut; Kohlbacher, Oliver}, url = {https://doi.org/10.1021/acs.jproteome.9b00566}, doi = {10.1021/acs.jproteome.9b00566}, year = {2020}, date = {2020-01-24}, journal = {J. Proteome Res.}, abstract = {Accurate protein inference under the presence of shared peptides is still one of the key problems in bottom-up proteomics. Most protein inference tools employing simple heuristic inference strategies are efficient, but exhibit reduced accuracy. More advanced probabilistic methods often exhibit better inference quality but tend to be too slow for large data sets. Here we present a novel protein inference method, EPIFANY, combining a loopy belief propagation algorithm with convolution trees for efficient processing of Bayesian networks. We demonstrate that EPIFANY combines the reliable protein inference of Bayesian methods with significantly shorter runtimes. On the 2016 iPRG protein inference benchmark data EPIFANY is the only tested method which finds all true-positive proteins at a 5% protein FDR without strict pre-filtering on PSM level, yielding an increase in identification performance (+10% in the number of true positives and +14% in partial AUC) compared to previous approaches. Even very large data sets with hundreds of thousands of spectra (which are intractable with other Bayesian and some non-Bayesian tools) can be processed with EPIFANY within minutes. The increased inference quality including shared peptides results in better protein inference results and thus increased robustness of the biological hypotheses generated. EPIFANY is available as open-source software for all major platforms at https://OpenMS.de/epifany. AB - Accurate protein inference under the presence of shared peptides is still one of the key problems in bottom-up proteomics. Most protein inference tools employing simple heuristic inference strategies are efficient, but exhibit reduced accuracy. More advanced probabilistic methods often exhibit better inference quality but tend to be too slow for large data sets. Here we present a novel protein inference method, EPIFANY, combining a loopy belief propagation algorithm with convolution trees for efficient processing of Bayesian networks. We demonstrate that EPIFANY combines the reliable protein inference of Bayesian methods with significantly shorter runtimes. On the 2016 iPRG protein inference benchmark data EPIFANY is the only tested method which finds all true-positive proteins at a 5% protein FDR without strict pre-filtering on PSM level, yielding an increase in identification performance (+10% in the number of true positives and +14% in partial AUC) compared to previous approaches. Even very large data sets with hundreds of thousands of spectra (which are intractable with other Bayesian and some non-Bayesian tools) can be processed with EPIFANY within minutes. The increased inference quality including shared peptides results in better protein inference results and thus increased robustness of the biological hypotheses generated. EPIFANY is available as open-source software for all major platforms at https://OpenMS.de/epifany.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Accurate protein inference under the presence of shared peptides is still one of the key problems in bottom-up proteomics. Most protein inference tools employing simple heuristic inference strategies are efficient, but exhibit reduced accuracy. More advanced probabilistic methods often exhibit better inference quality but tend to be too slow for large data sets. Here we present a novel protein inference method, EPIFANY, combining a loopy belief propagation algorithm with convolution trees for efficient processing of Bayesian networks. We demonstrate that EPIFANY combines the reliable protein inference of Bayesian methods with significantly shorter runtimes. On the 2016 iPRG protein inference benchmark data EPIFANY is the only tested method which finds all true-positive proteins at a 5% protein FDR without strict pre-filtering on PSM level, yielding an increase in identification performance (+10% in the number of true positives and +14% in partial AUC) compared to previous approaches. Even very large data sets with hundreds of thousands of spectra (which are intractable with other Bayesian and some non-Bayesian tools) can be processed with EPIFANY within minutes. The increased inference quality including shared peptides results in better protein inference results and thus increased robustness of the biological hypotheses generated. EPIFANY is available as open-source software for all major platforms at https://OpenMS.de/epifany. AB - Accurate protein inference under the presence of shared peptides is still one of the key problems in bottom-up proteomics. Most protein inference tools employing simple heuristic inference strategies are efficient, but exhibit reduced accuracy. More advanced probabilistic methods often exhibit better inference quality but tend to be too slow for large data sets. Here we present a novel protein inference method, EPIFANY, combining a loopy belief propagation algorithm with convolution trees for efficient processing of Bayesian networks. We demonstrate that EPIFANY combines the reliable protein inference of Bayesian methods with significantly shorter runtimes. On the 2016 iPRG protein inference benchmark data EPIFANY is the only tested method which finds all true-positive proteins at a 5% protein FDR without strict pre-filtering on PSM level, yielding an increase in identification performance (+10% in the number of true positives and +14% in partial AUC) compared to previous approaches. Even very large data sets with hundreds of thousands of spectra (which are intractable with other Bayesian and some non-Bayesian tools) can be processed with EPIFANY within minutes. The increased inference quality including shared peptides results in better protein inference results and thus increased robustness of the biological hypotheses generated. EPIFANY is available as open-source software for all major platforms at https://OpenMS.de/epifany. |
Hentrich, T; Koch, A; Weber, N; Kilzheimer, A; Burkhardt, S; Rall, K; Casadei, N; Kohlbacher, O; Riess, O; Schulze-Hentrich, JM; Brucker, SY The endometrial transcription landscape of MRKH syndrome bioRxiv, 2020. @article{Hentrich2020.02.18.954768, title = {The endometrial transcription landscape of MRKH syndrome}, author = {T Hentrich and A Koch and N Weber and A Kilzheimer and S Burkhardt and K Rall and N Casadei and O Kohlbacher and O Riess and JM Schulze-Hentrich and SY Brucker}, url = {https://www.biorxiv.org/content/early/2020/02/19/2020.02.18.954768}, doi = {10.1101/2020.02.18.954768}, year = {2020}, date = {2020-01-01}, journal = {bioRxiv}, publisher = {Cold Spring Harbor Laboratory}, abstract = {The Mayer-Rokitansky-Küster-Hauser (MRKH) syndrome (OMIM 277000) is characterized by agenesis of the uterus and upper part of the vagina in females with normal ovarian function. While genetic causes have been identified for a small subset of patients and epigenetic mechanisms presumably contribute to the pathogenic unfolding, too, the etiology of the syndrome has remained largely enigmatic. A comprehensive understanding of gene activity in the context of the disease is crucial to identify etiological components and their potential interplay. So far, this understanding is lacking, primarily due to the scarcity of samples and suitable tissue.In order to close this gap, we profiled endometrial tissue of uterus rudiments in a large cohort of MRKH patients using RNA-seq and thereby provide a genome-wide view on the altered transcription landscape of the MRKH syndrome. Differential and co-expression analyses of the data identified cellular processes and candidate genes that converge on a core network of interconnected regulators that emerge as pivotal for the perturbed expression space. With these results and browsable access to the rich data through an online tool we seek to accelerate research to unravel the underlying biology of this syndrome.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The Mayer-Rokitansky-Küster-Hauser (MRKH) syndrome (OMIM 277000) is characterized by agenesis of the uterus and upper part of the vagina in females with normal ovarian function. While genetic causes have been identified for a small subset of patients and epigenetic mechanisms presumably contribute to the pathogenic unfolding, too, the etiology of the syndrome has remained largely enigmatic. A comprehensive understanding of gene activity in the context of the disease is crucial to identify etiological components and their potential interplay. So far, this understanding is lacking, primarily due to the scarcity of samples and suitable tissue.In order to close this gap, we profiled endometrial tissue of uterus rudiments in a large cohort of MRKH patients using RNA-seq and thereby provide a genome-wide view on the altered transcription landscape of the MRKH syndrome. Differential and co-expression analyses of the data identified cellular processes and candidate genes that converge on a core network of interconnected regulators that emerge as pivotal for the perturbed expression space. With these results and browsable access to the rich data through an online tool we seek to accelerate research to unravel the underlying biology of this syndrome. |
Voelkel, Gunnar; Fuerstberger, Axel; Schwab, Julian D; Kuehlwein, Silke D; Gscheidmeier, Thomas; Kraus, Johann M; Gross, Alexander; Kohlmayer, Florian; Kuhn, Peter; Kuhn, Klaus A; Kohlbacher, Oliver; Seufferlein, Thomas; Kestler, Hans A A secure and rapid query-software for COVID-19 test results that can easily be integrated into the clinical workflow to avoid communication overload medRxiv, 2020. @article{Voelkel2020.04.07.20056887, title = {A secure and rapid query-software for COVID-19 test results that can easily be integrated into the clinical workflow to avoid communication overload}, author = {Gunnar Voelkel and Axel Fuerstberger and Julian D Schwab and Silke D Kuehlwein and Thomas Gscheidmeier and Johann M Kraus and Alexander Gross and Florian Kohlmayer and Peter Kuhn and Klaus A Kuhn and Oliver Kohlbacher and Thomas Seufferlein and Hans A Kestler}, url = {https://www.medrxiv.org/content/early/2020/04/11/2020.04.07.20056887}, doi = {10.1101/2020.04.07.20056887}, year = {2020}, date = {2020-01-01}, journal = {medRxiv}, publisher = {Cold Spring Harbor Laboratory Press}, abstract = {Overcoming the COVID-19 crisis requires new ideas and strategies. Rapid testing of a large number of subjects is essential to monitor, and delay, the spread of SARS-CoV-2 to mitigate the consequences of the pandemic. People not knowing that they are infected may not stay in quarantine and, thus, are a risk for infecting others. Unfortunately, the massive number of COVID-19 tests performed is challenging for both laboratories and the units that take the throat swab and have to communicate test results. Here, we present a secure tracking system (CTest) to report COVID-19 test results online as soon as they become available. The system can be integrated into the clinical workflow with very modest effort and avoids excessive load to telephone hotlines. With this open-source and browser-based online tracking system, we aim to minimize the time required to inform the tested person but also the test units, e.g. hospitals or the public healthcare system. Instead of personal calls, CTest updates the status of the test automatically when the test results are available. Test reports are published on a secured web-page enabling regular status checks also by patients not using smartphones with dedicated mobile apps which has some importance as smartphone usage diminishes with age. The source code, as well as further information to integrate CTest into the IT environment of other clinics or test-centres, are freely available from https://github.com/sysbio-bioinf/CTest under the Eclipse Public License v2.0 (EPL2).Competing Interest StatementThe authors have declared no competing interest.Funding StatementKAK, HAK and OK acknowledge, funding from the Germany Federal Ministry of Education and Research (BMBF) as part of the DIFUTURE project (Medical Informatics Initiative, grant numbers 01ZZ1804I and 01ZZ1804D). OK and HAK acknowledge funding from the Ministry of Science and Art Baden-Württemberg (Zentrum für Innovative Versorgung, ZIV). OK and TS acknowledge funding from the Ministry of Social Affairs of the state of Baden-Württemberg (Zentren für Personalisierte Medizin, ZPM), HAK also acknowledges funding from the German Science Foundation (DFG, grant number 217328187).Author DeclarationsAll relevant ethical guidelines have been followed; any necessary IRB and/or ethics committee approvals have been obtained and details of the IRB/oversight body are included in the manuscript.YesAll necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).Yes I have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesA software is freely available from https://github.com/sysbio-bioinf/CTest under the Eclipse Public License v2.0 (EPL2).https://github.com/sysbio-bioinf/CTest}, keywords = {}, pubstate = {published}, tppubtype = {article} } Overcoming the COVID-19 crisis requires new ideas and strategies. Rapid testing of a large number of subjects is essential to monitor, and delay, the spread of SARS-CoV-2 to mitigate the consequences of the pandemic. People not knowing that they are infected may not stay in quarantine and, thus, are a risk for infecting others. Unfortunately, the massive number of COVID-19 tests performed is challenging for both laboratories and the units that take the throat swab and have to communicate test results. Here, we present a secure tracking system (CTest) to report COVID-19 test results online as soon as they become available. The system can be integrated into the clinical workflow with very modest effort and avoids excessive load to telephone hotlines. With this open-source and browser-based online tracking system, we aim to minimize the time required to inform the tested person but also the test units, e.g. hospitals or the public healthcare system. Instead of personal calls, CTest updates the status of the test automatically when the test results are available. Test reports are published on a secured web-page enabling regular status checks also by patients not using smartphones with dedicated mobile apps which has some importance as smartphone usage diminishes with age. The source code, as well as further information to integrate CTest into the IT environment of other clinics or test-centres, are freely available from https://github.com/sysbio-bioinf/CTest under the Eclipse Public License v2.0 (EPL2).Competing Interest StatementThe authors have declared no competing interest.Funding StatementKAK, HAK and OK acknowledge, funding from the Germany Federal Ministry of Education and Research (BMBF) as part of the DIFUTURE project (Medical Informatics Initiative, grant numbers 01ZZ1804I and 01ZZ1804D). OK and HAK acknowledge funding from the Ministry of Science and Art Baden-Württemberg (Zentrum für Innovative Versorgung, ZIV). OK and TS acknowledge funding from the Ministry of Social Affairs of the state of Baden-Württemberg (Zentren für Personalisierte Medizin, ZPM), HAK also acknowledges funding from the German Science Foundation (DFG, grant number 217328187).Author DeclarationsAll relevant ethical guidelines have been followed; any necessary IRB and/or ethics committee approvals have been obtained and details of the IRB/oversight body are included in the manuscript.YesAll necessary patient/participant consent has been obtained and the appropriate institutional forms have been archived.YesI understand that all clinical trials and any other prospective interventional studies must be registered with an ICMJE-approved registry, such as ClinicalTrials.gov. I confirm that any such study reported in the manuscript has been registered and the trial registration ID is provided (note: if posting a prospective study registered retrospectively, please provide a statement in the trial ID field explaining why the study was not registered in advance).Yes I have followed all appropriate research reporting guidelines and uploaded the relevant EQUATOR Network research reporting checklist(s) and other pertinent material as supplementary files, if applicable.YesA software is freely available from https://github.com/sysbio-bioinf/CTest under the Eclipse Public License v2.0 (EPL2).https://github.com/sysbio-bioinf/CTest |
Jeong, K; Kim, J; Gaikwad, M; Hidayah, S N; Heikaus, L; Schl?ter, H; Kohlbacher, O FLASĦĐeconv: Ultrafast, Ħigh-Quality Feature Đeconvolution for Ŧop-Đown Proteomics Cell Syst, 10 (2), pp. 213–218, 2020. @article{pmid32078799, title = {FLASĦĐeconv: Ultrafast, Ħigh-Quality Feature Đeconvolution for Ŧop-Đown Proteomics}, author = {K Jeong and J Kim and M Gaikwad and S N Hidayah and L Heikaus and H Schl?ter and O Kohlbacher}, year = {2020}, date = {2020-01-01}, journal = {Cell Syst}, volume = {10}, number = {2}, pages = {213--218}, abstract = {Top-down mass spectrometry (TD-MS)-based proteomics analyzes intact proteoforms and thus preserves information about individual protein species. The MS signal of these high-mass analytes is complex and challenges the accurate determination of proteoform masses. Fast and accurate feature deconvolution (i.e., the determination of intact proteoform masses) is, therefore, an essential step for TD data analysis. Here, we present FLASHDeconv, an algorithm achieving higher deconvolution quality, with an execution speed two orders of magnitude faster than existing approaches. FLASHDeconv transforms peak positions (m/z) within spectra into log m/z space. This simple transformation turns the deconvolution problem into a search for constant patterns, thereby greatly accelerating the process. In both simple and complex samples, FLASHDeconv reports more genuine feature masses and substantially fewer artifacts than other existing methods. FLASHDeconv is freely available for download here: https://www.openms.org/flashdeconv/. A record of this paper's Transparent Peer Review process is included in the Supplemental Information.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Top-down mass spectrometry (TD-MS)-based proteomics analyzes intact proteoforms and thus preserves information about individual protein species. The MS signal of these high-mass analytes is complex and challenges the accurate determination of proteoform masses. Fast and accurate feature deconvolution (i.e., the determination of intact proteoform masses) is, therefore, an essential step for TD data analysis. Here, we present FLASHDeconv, an algorithm achieving higher deconvolution quality, with an execution speed two orders of magnitude faster than existing approaches. FLASHDeconv transforms peak positions (m/z) within spectra into log m/z space. This simple transformation turns the deconvolution problem into a search for constant patterns, thereby greatly accelerating the process. In both simple and complex samples, FLASHDeconv reports more genuine feature masses and substantially fewer artifacts than other existing methods. FLASHDeconv is freely available for download here: https://www.openms.org/flashdeconv/. A record of this paper's Transparent Peer Review process is included in the Supplemental Information. |
Wein, S; Andrews, B; Sachsenberg, T; Santos-Rosa, H; Kohlbacher, O; Kouzarides, T; Garcia, B A; Weisser, H A computational platform for high-throughput analysis of RNA sequences and modifications by mass spectrometry Nat Commun, 11 (1), pp. 926, 2020. @article{pmid32066737, title = {A computational platform for high-throughput analysis of RNA sequences and modifications by mass spectrometry}, author = {S Wein and B Andrews and T Sachsenberg and H Santos-Rosa and O Kohlbacher and T Kouzarides and B A Garcia and H Weisser}, year = {2020}, date = {2020-01-01}, journal = {Nat Commun}, volume = {11}, number = {1}, pages = {926}, abstract = {The field of epitranscriptomics continues to reveal how post-transcriptional modification of RNA affects a wide variety of biological phenomena. A pivotal challenge in this area is the identification of modified RNA residues within their sequence contexts. Mass spectrometry (MS) offers a comprehensive solution by using analogous approaches to shotgun proteomics. However, software support for the analysis of RNA MS data is inadequate at present and does not allow high-throughput processing. Existing software solutions lack the raw performance and statistical grounding to efficiently handle the numerous modifications found on RNA. We present a free and open-source database search engine for RNA MS data, called NucleicAcidSearchEngine (NASE), that addresses these shortcomings. We demonstrate the capability of NASE to reliably identify a wide range of modified RNA sequences in four original datasets of varying complexity. In human tRNA, we characterize over 20 different modification types simultaneously and find many cases of incomplete modification.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The field of epitranscriptomics continues to reveal how post-transcriptional modification of RNA affects a wide variety of biological phenomena. A pivotal challenge in this area is the identification of modified RNA residues within their sequence contexts. Mass spectrometry (MS) offers a comprehensive solution by using analogous approaches to shotgun proteomics. However, software support for the analysis of RNA MS data is inadequate at present and does not allow high-throughput processing. Existing software solutions lack the raw performance and statistical grounding to efficiently handle the numerous modifications found on RNA. We present a free and open-source database search engine for RNA MS data, called NucleicAcidSearchEngine (NASE), that addresses these shortcomings. We demonstrate the capability of NASE to reliably identify a wide range of modified RNA sequences in four original datasets of varying complexity. In human tRNA, we characterize over 20 different modification types simultaneously and find many cases of incomplete modification. |
Schulte-Schrepping, J; Reusch, N; Paclik, D; Ba?ler, K; Schlickeiser, S; Zhang, B; Kr?mer, B; Krammer, T; Brumhard, S; Bonaguro, L; Domenico, De E; Wendisch, D; Grasshoff, M; Kapellos, T S; Beckstette, M; Pecht, T; Saglam, A; Dietrich, O; Mei, H E; Schulz, A R; Conrad, C; Kunkel, D; Vafadarnejad, E; Xu, C J; Horne, A; Herbert, M; Drews, A; Thibeault, C; Pfeiffer, M; Hippenstiel, S; Hocke, A; M?ller-Redetzky, H; Heim, K M; Machleidt, F; Uhrig, A; de Jarcy, Bosquillon L; J?rgens, L; Stegemann, M; Gl?senkamp, C R; Volk, H D; Goffinet, C; Landthaler, M; Wyler, E; Georg, P; Schneider, M; Dang-Heine, C; Neuwinger, N; Kappert, K; Tauber, R; Corman, V; Raabe, J; Kaiser, K M; Vinh, M T; Rieke, G; Meisel, C; Ulas, T; Becker, M; Geffers, R; Witzenrath, M; Drosten, C; Suttorp, N; von Kalle, C; Kurth, F; H?ndler, K; Schultze, J L; Aschenbrenner, A C; Li, Y; Nattermann, J; Sawitzki, B; Saliba, A E; Sander, L E; Angelov, A; Bals, R; Bartholom?us, A; Becker, A; Bezdan, D; Bonifacio, E; Bork, P; Clavel, T; Colome-Tatche, M; Diefenbach, A; Dilthey, A; Fischer, N; F?rstner, K; Frick, J S; Gagneur, J; Goesmann, A; Hain, T; Hummel, M; Janssen, S; Kalinowski, J; Kallies, R; Kehr, B; Keller, A; Kim-Hellmuth, S; Klein, C; Kohlbacher, O; Korbel, J O; Kurth, I; Landthaler, M; Li, Y; Ludwig, K; Makarewicz, O; Marz, M; McHardy, A; Mertes, C; N?then, M; N?rnberg, P; Ohler, U; Ossowski, S; Overmann, J; Peter, S; Pfeffer, K; Poetsch, A R; P?hler, A; Rajewsky, N; Ralser, M; Rie?, O; Ripke, S; da Rocha, Nunes U; Rosenstiel, P; Saliba, A E; Sander, L E; Sawitzki, B; Schiffer, P; Schulte, E C; Schultze, J L; Sczyrba, A; Stegle, O; Stoye, J; Theis, F; Vehreschild, J; Vogel, J; von Kleist, M; Walker, A; Walter, J; Wieczorek, D; Ziebuhr, J Severe COVIĐ-19 Is Marked by a Đysregulated Myeloid Cell Compartment Cell, 182 (6), pp. 1419–1440, 2020. (BibTeX) @article{pmid32810438b, title = {Severe COVIĐ-19 Is Marked by a Đysregulated Myeloid Cell Compartment}, author = {J Schulte-Schrepping and N Reusch and D Paclik and K Ba?ler and S Schlickeiser and B Zhang and B Kr?mer and T Krammer and S Brumhard and L Bonaguro and E De Domenico and D Wendisch and M Grasshoff and T S Kapellos and M Beckstette and T Pecht and A Saglam and O Dietrich and H E Mei and A R Schulz and C Conrad and D Kunkel and E Vafadarnejad and C J Xu and A Horne and M Herbert and A Drews and C Thibeault and M Pfeiffer and S Hippenstiel and A Hocke and H M?ller-Redetzky and K M Heim and F Machleidt and A Uhrig and L Bosquillon de Jarcy and L J?rgens and M Stegemann and C R Gl?senkamp and H D Volk and C Goffinet and M Landthaler and E Wyler and P Georg and M Schneider and C Dang-Heine and N Neuwinger and K Kappert and R Tauber and V Corman and J Raabe and K M Kaiser and M T Vinh and G Rieke and C Meisel and T Ulas and M Becker and R Geffers and M Witzenrath and C Drosten and N Suttorp and C von Kalle and F Kurth and K H?ndler and J L Schultze and A C Aschenbrenner and Y Li and J Nattermann and B Sawitzki and A E Saliba and L E Sander and A Angelov and R Bals and A Bartholom?us and A Becker and D Bezdan and E Bonifacio and P Bork and T Clavel and M Colome-Tatche and A Diefenbach and A Dilthey and N Fischer and K F?rstner and J S Frick and J Gagneur and A Goesmann and T Hain and M Hummel and S Janssen and J Kalinowski and R Kallies and B Kehr and A Keller and S Kim-Hellmuth and C Klein and O Kohlbacher and J O Korbel and I Kurth and M Landthaler and Y Li and K Ludwig and O Makarewicz and M Marz and A McHardy and C Mertes and M N?then and P N?rnberg and U Ohler and S Ossowski and J Overmann and S Peter and K Pfeffer and A R Poetsch and A P?hler and N Rajewsky and M Ralser and O Rie? and S Ripke and U Nunes da Rocha and P Rosenstiel and A E Saliba and L E Sander and B Sawitzki and P Schiffer and E C Schulte and J L Schultze and A Sczyrba and O Stegle and J Stoye and F Theis and J Vehreschild and J Vogel and M von Kleist and A Walker and J Walter and D Wieczorek and J Ziebuhr}, year = {2020}, date = {2020-01-01}, journal = {Cell}, volume = {182}, number = {6}, pages = {1419--1440}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Wibberg, D; Batut, B; Belmann, P; Blom, J; Glöckner, FO; Grüning, B; Hoffmann, N; Kleinbölting, N; Rahn, R; Rey, M; Scholz, U; Sharan, M; Tauch, A; Trojahn, U; Usadel, B; Kohlbacher, O The de.NBI / ELIXIR-DE training platform - Bioinformatics training in Germany and across Europe within ELIXIR [version 2; peer review: 2 approved] F1000Research, 8 (1877), 2020. @article{10.12688/f1000research.20244.2, title = {The de.NBI / ELIXIR-DE training platform - Bioinformatics training in Germany and across Europe within ELIXIR [version 2; peer review: 2 approved]}, author = {D Wibberg and B Batut and P Belmann and J Blom and FO Glöckner and B Grüning and N Hoffmann and N Kleinbölting and R Rahn and M Rey and U Scholz and M Sharan and A Tauch and U Trojahn and B Usadel and O Kohlbacher}, doi = {10.12688/f1000research.20244.2}, year = {2020}, date = {2020-01-01}, journal = {F1000Research}, volume = {8}, number = {1877}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Kutuzova, Svetlana; Colaianni, Pasquale; Röst, Hannes; Sachsenberg, Timo; Alka, Oliver; Kohlbacher, Oliver; Burla, Bo; Torta, Federico; Schrübbers, Lars; Kristensen, Mette; Nielsen, Lars; Herrgård, Markus J; McCloskey, Douglas SmartPeak automates targeted and quantitative metabolomics data processing 2020. (BibTeX) @unpublished{Kutuzova2020-ql, title = {SmartPeak automates targeted and quantitative metabolomics data processing}, author = {Svetlana Kutuzova and Pasquale Colaianni and Hannes Röst and Timo Sachsenberg and Oliver Alka and Oliver Kohlbacher and Bo Burla and Federico Torta and Lars Schrübbers and Mette Kristensen and Lars Nielsen and Markus J Herrgård and Douglas McCloskey}, year = {2020}, date = {2020-01-01}, journal = {Cold Spring Harbor Laboratory}, pages = {2020.07.14.202002}, keywords = {}, pubstate = {published}, tppubtype = {unpublished} } |
Bernardes, Joana P; Mishra, Neha; Tran, Florian; Bahmer, Thomas; Best, Lena; Blase, Johanna I; Bordoni, Dora; Franzenburg, Jeanette; Geisen, Ulf; Josephs-Spaulding, Jonathan; Köhler, Philipp; Künstner, Axel; Rosati, Elisa; Aschenbrenner, Anna C; Bacher, Petra; Baran, Nathan; Boysen, Teide; Brandt, Burkhard; Bruse, Niklas; Dörr, Jonathan; Dräger, Andreas; Elke, Gunnar; Ellinghaus, David; Fischer, Julia; Forster, Michael; Franke, Andre; Franzenburg, Sören; Frey, Norbert; Friedrichs, Anette; Fuß, Janina; Glück, Andreas; Hamm, Jacob; Hinrichsen, Finn; Hoeppner, Marc P; Imm, Simon; Junker, Ralf; Kaiser, Sina; Kan, Ying H; Knoll, Rainer; Lange, Christoph; Laue, Georg; Lier, Clemens; Lindner, Matthias; Marinos, Georgios; Markewitz, Robert; Nattermann, Jacob; Noth, Rainer; Pickkers, Peter; Rabe, Klaus F; Renz, Alina; Röcken, Christoph; Rupp, Jan; Schaffarzyk, Annika; Scheffold, Alexander; Schulte-Schrepping, Jonas; Schunk, Domagoj; Skowasch, Dirk; Ulas, Thomas; Wandinger, Klaus-Peter; Wittig, Michael; Zimmermann, Johannes; Busch, Hauke; Hoyer, Bimba F; Kaleta, Christoph; Heyckendorf, Jan; Kox, Matthijs; Rybniker, Jan; Schreiber, Stefan; Schultze, Joachim L; Rosenstiel, Philip; HCA Lung Biological Network, ; Deutsche COVID-19 Omics Initiative (DeCOI), Longitudinal Multi-omics Analyses Identify Responses of Megakaryocytes, Erythroid Cells, and Plasmablasts as Hallmarks of Severe COVID-19 Immunity, 2020. @article{Bernardes2020-um, title = {Longitudinal Multi-omics Analyses Identify Responses of Megakaryocytes, Erythroid Cells, and Plasmablasts as Hallmarks of Severe COVID-19}, author = {Joana P Bernardes and Neha Mishra and Florian Tran and Thomas Bahmer and Lena Best and Johanna I Blase and Dora Bordoni and Jeanette Franzenburg and Ulf Geisen and Jonathan Josephs-Spaulding and Philipp Köhler and Axel Künstner and Elisa Rosati and Anna C Aschenbrenner and Petra Bacher and Nathan Baran and Teide Boysen and Burkhard Brandt and Niklas Bruse and Jonathan Dörr and Andreas Dräger and Gunnar Elke and David Ellinghaus and Julia Fischer and Michael Forster and Andre Franke and Sören Franzenburg and Norbert Frey and Anette Friedrichs and Janina Fuß and Andreas Glück and Jacob Hamm and Finn Hinrichsen and Marc P Hoeppner and Simon Imm and Ralf Junker and Sina Kaiser and Ying H Kan and Rainer Knoll and Christoph Lange and Georg Laue and Clemens Lier and Matthias Lindner and Georgios Marinos and Robert Markewitz and Jacob Nattermann and Rainer Noth and Peter Pickkers and Klaus F Rabe and Alina Renz and Christoph Röcken and Jan Rupp and Annika Schaffarzyk and Alexander Scheffold and Jonas Schulte-Schrepping and Domagoj Schunk and Dirk Skowasch and Thomas Ulas and Klaus-Peter Wandinger and Michael Wittig and Johannes Zimmermann and Hauke Busch and Bimba F Hoyer and Christoph Kaleta and Jan Heyckendorf and Matthijs Kox and Jan Rybniker and Stefan Schreiber and Joachim L Schultze and Philip Rosenstiel and {HCA Lung Biological Network} and {Deutsche COVID-19 Omics Initiative (DeCOI)}}, year = {2020}, date = {2020-01-01}, journal = {Immunity}, abstract = {Temporal resolution of cellular features associated with a severe COVID-19 disease trajectory is needed for understanding skewed immune responses and defining predictors of outcome. Here, we performed a longitudinal multi-omics study using a two-center cohort of 14 patients. We analyzed the bulk transcriptome, bulk DNA methylome, and single-cell transcriptome (>358,000 cells, including BCR profiles) of peripheral blood samples harvested from up to 5 time points. Validation was performed in two independent cohorts of COVID-19 patients. Severe COVID-19 was characterized by an increase of proliferating, metabolically hyperactive plasmablasts. Coinciding with critical illness, we also identified an expansion of interferon-activated circulating megakaryocytes and increased erythropoiesis with features of hypoxic signaling. Megakaryocyte- and erythroid-cell-derived co-expression modules were predictive of fatal disease outcome. The study demonstrates broad cellular effects of SARS-CoV-2 infection beyond adaptive immune cells and provides an entry point toward developing biomarkers and targeted treatments of patients with COVID-19.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Temporal resolution of cellular features associated with a severe COVID-19 disease trajectory is needed for understanding skewed immune responses and defining predictors of outcome. Here, we performed a longitudinal multi-omics study using a two-center cohort of 14 patients. We analyzed the bulk transcriptome, bulk DNA methylome, and single-cell transcriptome (>358,000 cells, including BCR profiles) of peripheral blood samples harvested from up to 5 time points. Validation was performed in two independent cohorts of COVID-19 patients. Severe COVID-19 was characterized by an increase of proliferating, metabolically hyperactive plasmablasts. Coinciding with critical illness, we also identified an expansion of interferon-activated circulating megakaryocytes and increased erythropoiesis with features of hypoxic signaling. Megakaryocyte- and erythroid-cell-derived co-expression modules were predictive of fatal disease outcome. The study demonstrates broad cellular effects of SARS-CoV-2 infection beyond adaptive immune cells and provides an entry point toward developing biomarkers and targeted treatments of patients with COVID-19. |
Samonig, Lisa; Loipetzberger, Andrea; Blöchl, Constantin; Rurik, Marc; Kohlbacher, Oliver; Aberger, Fritz; Huber, Christian G Proteins and molecular pathways relevant for the malignant properties of tumor-initiating pancreatic cancer cells Cells, 9 (6), pp. 1397, 2020. @article{Samonig2020-qc, title = {Proteins and molecular pathways relevant for the malignant properties of tumor-initiating pancreatic cancer cells}, author = {Lisa Samonig and Andrea Loipetzberger and Constantin Blöchl and Marc Rurik and Oliver Kohlbacher and Fritz Aberger and Christian G Huber}, year = {2020}, date = {2020-01-01}, journal = {Cells}, volume = {9}, number = {6}, pages = {1397}, publisher = {MDPI AG}, abstract = {Cancer stem cells (CSCs), a small subset of the tumor bulk with highly malignant properties, are deemed responsible for tumor initiation, growth, metastasis, and relapse. In order to reveal molecular markers and determinants of their tumor-initiating properties, we enriched rare stem-like pancreatic tumor-initiating cells (TICs) by harnessing their clonogenic growth capacity in three-dimensional multicellular spheroid cultures. We compared pancreatic TICs isolated from three-dimensional tumor spheroid cultures with nontumor-initiating cells (non-TICs) enriched in planar cultures. Employing differential proteomics (PTX), we identified more than 400 proteins with significantly different expression in pancreatic TICs and the non-TIC population. By combining the unbiased PTX with mRNA expression analysis and literature-based predictions of pro-malignant functions, we nominated the two calcium-binding proteins S100A8 (MRP8) and S100A9 (MRP14) as well as galactin-3-binding protein LGALS3BP (MAC-2-BP) as putative determinants of pancreatic TICs. In silico pathway analysis followed by candidate-based RNA interference mediated loss-of-function analysis revealed a critical role of S100A8, S100A9, and LGALS3BP as molecular determinants of TIC proliferation, migration, and in vivo tumor growth. Our study highlights the power of combining unbiased proteomics with focused gene expression and functional analyses for the identification of novel key regulators of TICs, an approach that warrants further application to identify proteins and pathways amenable to drug targeting.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Cancer stem cells (CSCs), a small subset of the tumor bulk with highly malignant properties, are deemed responsible for tumor initiation, growth, metastasis, and relapse. In order to reveal molecular markers and determinants of their tumor-initiating properties, we enriched rare stem-like pancreatic tumor-initiating cells (TICs) by harnessing their clonogenic growth capacity in three-dimensional multicellular spheroid cultures. We compared pancreatic TICs isolated from three-dimensional tumor spheroid cultures with nontumor-initiating cells (non-TICs) enriched in planar cultures. Employing differential proteomics (PTX), we identified more than 400 proteins with significantly different expression in pancreatic TICs and the non-TIC population. By combining the unbiased PTX with mRNA expression analysis and literature-based predictions of pro-malignant functions, we nominated the two calcium-binding proteins S100A8 (MRP8) and S100A9 (MRP14) as well as galactin-3-binding protein LGALS3BP (MAC-2-BP) as putative determinants of pancreatic TICs. In silico pathway analysis followed by candidate-based RNA interference mediated loss-of-function analysis revealed a critical role of S100A8, S100A9, and LGALS3BP as molecular determinants of TIC proliferation, migration, and in vivo tumor growth. Our study highlights the power of combining unbiased proteomics with focused gene expression and functional analyses for the identification of novel key regulators of TICs, an approach that warrants further application to identify proteins and pathways amenable to drug targeting. |
Lichao Zhang Sebastian Winkler, Fabian Schlottmann Oliver Kohlbacher Josh Elias Jan Skotheim Jennifer Ewald P E M C Multiple Layers of Phospho-Regulation Coordinate Metabolism and the Cell Cycle in Budding Yeast Front. Cell Dev. Biol., 2019. @article{Zhang2020, title = {Multiple Layers of Phospho-Regulation Coordinate Metabolism and the Cell Cycle in Budding Yeast}, author = {Lichao Zhang, Sebastian Winkler, Fabian P. Schlottmann, Oliver Kohlbacher, Josh E. Elias, Jan M. Skotheim, Jennifer C. Ewald}, url = {https://doi.org/10.3389/fcell.2019.00338}, doi = {10.3389/fcell.2019.00338}, year = {2019}, date = {2019-12-17}, journal = {Front. Cell Dev. Biol.}, abstract = {The coordination of metabolism and growth with cell division is crucial for proliferation. While it has long been known that cell metabolism regulates the cell division cycle, it is becoming increasingly clear that the cell division cycle also regulates metabolism. In budding yeast, we previously showed that over half of all measured metabolites change concentration through the cell cycle indicating that metabolic fluxes are extensively regulated during cell cycle progression. However, how this regulation is achieved still remains poorly understood. Since both the cell cycle and metabolism are regulated to a large extent by protein phosphorylation, we here decided to measure the phosphoproteome through the budding yeast cell cycle. Specifically, we chose a cell cycle synchronization strategy that avoids stress and nutrient-related perturbations of metabolism, and we grew the yeast on ethanol minimal medium to force cells to utilize their full biosynthetic repertoire. Using a tandem-mass-tagging approach, we found over 200 sites on metabolic enzymes and transporters to be phospho-regulated. These sites were distributed among many pathways including carbohydrate catabolism, lipid metabolism, and amino acid synthesis and therefore likely contribute to changing metabolic fluxes through the cell cycle. Among all one thousand sites whose phosphorylation increases through the cell cycle, the CDK consensus motif and an arginine-directed motif were highly enriched. This arginine-directed R-R-x-S motif is associated with protein-kinase A, which regulates metabolism and promotes growth. Finally, we also found over one thousand sites that are dephosphorylated through the G1/S transition. We speculate that the phosphatase Glc7/PP1, known to regulate both the cell cycle and carbon metabolism, may play an important role because its regulatory subunits are phospho-regulated in our data. In summary, our results identify extensive cell cycle dependent phosphorylation and dephosphorylation of metabolic enzymes and suggest multiple mechanisms through which the cell division cycle regulates metabolic signaling pathways to temporally coordinate biosynthesis with distinct phases of the cell division cycle.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The coordination of metabolism and growth with cell division is crucial for proliferation. While it has long been known that cell metabolism regulates the cell division cycle, it is becoming increasingly clear that the cell division cycle also regulates metabolism. In budding yeast, we previously showed that over half of all measured metabolites change concentration through the cell cycle indicating that metabolic fluxes are extensively regulated during cell cycle progression. However, how this regulation is achieved still remains poorly understood. Since both the cell cycle and metabolism are regulated to a large extent by protein phosphorylation, we here decided to measure the phosphoproteome through the budding yeast cell cycle. Specifically, we chose a cell cycle synchronization strategy that avoids stress and nutrient-related perturbations of metabolism, and we grew the yeast on ethanol minimal medium to force cells to utilize their full biosynthetic repertoire. Using a tandem-mass-tagging approach, we found over 200 sites on metabolic enzymes and transporters to be phospho-regulated. These sites were distributed among many pathways including carbohydrate catabolism, lipid metabolism, and amino acid synthesis and therefore likely contribute to changing metabolic fluxes through the cell cycle. Among all one thousand sites whose phosphorylation increases through the cell cycle, the CDK consensus motif and an arginine-directed motif were highly enriched. This arginine-directed R-R-x-S motif is associated with protein-kinase A, which regulates metabolism and promotes growth. Finally, we also found over one thousand sites that are dephosphorylated through the G1/S transition. We speculate that the phosphatase Glc7/PP1, known to regulate both the cell cycle and carbon metabolism, may play an important role because its regulatory subunits are phospho-regulated in our data. In summary, our results identify extensive cell cycle dependent phosphorylation and dephosphorylation of metabolic enzymes and suggest multiple mechanisms through which the cell division cycle regulates metabolic signaling pathways to temporally coordinate biosynthesis with distinct phases of the cell division cycle. |
Lara Schneider Tim Kehl, Kristina Thedinga Nadja Grammes Christina Backes Christopher Mohr Benjamin Schubert Kerstin Lenhof Nico Gerstner Andreas Daniel Hartkopf Markus Wallwiener Oliver Kohlbacher Andreas Keller Eckart Meese Norbert Graf Hans-Peter Lenhof L ClinOmicsTrailbc: a visual analytics tool for breast cancer treatment stratification Bioinformatics, 35 (24), pp. 5171-5181, 2019. @article{SchneiderClinOmics2019, title = {ClinOmicsTrailbc: a visual analytics tool for breast cancer treatment stratification}, author = {Lara Schneider, Tim Kehl, Kristina Thedinga, Nadja L. Grammes, Christina Backes, Christopher Mohr, Benjamin Schubert, Kerstin Lenhof, Nico Gerstner, Andreas Daniel Hartkopf, Markus Wallwiener, Oliver Kohlbacher, Andreas Keller, Eckart Meese, Norbert Graf, Hans-Peter Lenhof}, doi = {10.1093/bioinformatics/btz302}, year = {2019}, date = {2019-12-15}, journal = {Bioinformatics}, volume = {35}, number = {24}, pages = {5171-5181}, abstract = {Motivation Breast cancer is the second leading cause of cancer death among women. Tumors, even of the same histopathological subtype, exhibit a high genotypic diversity that impedes therapy stratification and that hence must be accounted for in the treatment decision-making process. Results Here, we present ClinOmicsTrailbc, a comprehensive visual analytics tool for breast cancer decision support that provides a holistic assessment of standard-of-care targeted drugs, candidates for drug repositioning and immunotherapeutic approaches. To this end, our tool analyzes and visualizes clinical markers and (epi-)genomics and transcriptomics datasets to identify and evaluate the tumor’s main driver mutations, the tumor mutational burden, activity patterns of core cancer-relevant pathways, drug-specific biomarkers, the status of molecular drug targets and pharmacogenomic influences. In order to demonstrate ClinOmicsTrailbc’s rich functionality, we present three case studies highlighting various ways in which ClinOmicsTrailbc can support breast cancer precision medicine. ClinOmicsTrailbc is a powerful integrated visual analytics tool for breast cancer research in general and for therapy stratification in particular, assisting oncologists to find the best possible treatment options for their breast cancer patients based on actionable, evidence-based results. Availability and implementation ClinOmicsTrailbc can be freely accessed at https://clinomicstrail.bioinf.uni-sb.de. Supplementary information Supplementary data are available at Bioinformatics online.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation Breast cancer is the second leading cause of cancer death among women. Tumors, even of the same histopathological subtype, exhibit a high genotypic diversity that impedes therapy stratification and that hence must be accounted for in the treatment decision-making process. Results Here, we present ClinOmicsTrailbc, a comprehensive visual analytics tool for breast cancer decision support that provides a holistic assessment of standard-of-care targeted drugs, candidates for drug repositioning and immunotherapeutic approaches. To this end, our tool analyzes and visualizes clinical markers and (epi-)genomics and transcriptomics datasets to identify and evaluate the tumor’s main driver mutations, the tumor mutational burden, activity patterns of core cancer-relevant pathways, drug-specific biomarkers, the status of molecular drug targets and pharmacogenomic influences. In order to demonstrate ClinOmicsTrailbc’s rich functionality, we present three case studies highlighting various ways in which ClinOmicsTrailbc can support breast cancer precision medicine. ClinOmicsTrailbc is a powerful integrated visual analytics tool for breast cancer research in general and for therapy stratification in particular, assisting oncologists to find the best possible treatment options for their breast cancer patients based on actionable, evidence-based results. Availability and implementation ClinOmicsTrailbc can be freely accessed at https://clinomicstrail.bioinf.uni-sb.de. Supplementary information Supplementary data are available at Bioinformatics online. |
L, Schneider; T, Kehl; K, Thedinga; NL, Grammes; C, Backes; C, Mohr; B, Schubert; K, Lenhof; N, Gerstner; AD, Hartkopf; M, Wallwiener; O, Kohlbacher; A, Keller; E, Meese; N, Graf; HP, Lenhof ClinOmicsTrailbc: a visual analytics tool for breast cancer treatment stratification. Bioinformatics, 35 (24), pp. 5171-5181, 2019. @article{Schneider_2019, title = {ClinOmicsTrailbc: a visual analytics tool for breast cancer treatment stratification.}, author = {Schneider L and Kehl T and Thedinga K and Grammes NL and Backes C and Mohr C and Schubert B and Lenhof K and Gerstner N and Hartkopf AD and Wallwiener M and Kohlbacher O and Keller A and Meese E and Graf N and Lenhof HP}, url = {https://academic.oup.com/bioinformatics/article/35/24/5171/5481954}, doi = {10.1093/bioinformatics/btz302 }, year = {2019}, date = {2019-12-15}, journal = {Bioinformatics}, volume = {35}, number = {24}, pages = {5171-5181}, abstract = { Motivation: Breast cancer is the second leading cause of cancer death among women. Tumors, even of the same histopathological subtype, exhibit a high genotypic diversity that impedes therapy stratification and that hence must be accounted for in the treatment decision-making process. Results: Here, we present ClinOmicsTrailbc, a comprehensive visual analytics tool for breast cancer decision support that provides a holistic assessment of standard-of-care targeted drugs, candidates for drug repositioning and immunotherapeutic approaches. To this end, our tool analyzes and visualizes clinical markers and (epi-)genomics and transcriptomics datasets to identify and evaluate the tumor's main driver mutations, the tumor mutational burden, activity patterns of core cancer-relevant pathways, drug-specific biomarkers, the status of molecular drug targets and pharmacogenomic influences. In order to demonstrate ClinOmicsTrailbc's rich functionality, we present three case studies highlighting various ways in which ClinOmicsTrailbc can support breast cancer precision medicine. ClinOmicsTrailbc is a powerful integrated visual analytics tool for breast cancer research in general and for therapy stratification in particular, assisting oncologists to find the best possible treatment options for their breast cancer patients based on actionable, evidence-based results. Availability and implementation: ClinOmicsTrailbc can be freely accessed at https://clinomicstrail.bioinf.uni-sb.de. Supplementary information: Supplementary data are available at Bioinformatics online. }, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: Breast cancer is the second leading cause of cancer death among women. Tumors, even of the same histopathological subtype, exhibit a high genotypic diversity that impedes therapy stratification and that hence must be accounted for in the treatment decision-making process. Results: Here, we present ClinOmicsTrailbc, a comprehensive visual analytics tool for breast cancer decision support that provides a holistic assessment of standard-of-care targeted drugs, candidates for drug repositioning and immunotherapeutic approaches. To this end, our tool analyzes and visualizes clinical markers and (epi-)genomics and transcriptomics datasets to identify and evaluate the tumor's main driver mutations, the tumor mutational burden, activity patterns of core cancer-relevant pathways, drug-specific biomarkers, the status of molecular drug targets and pharmacogenomic influences. In order to demonstrate ClinOmicsTrailbc's rich functionality, we present three case studies highlighting various ways in which ClinOmicsTrailbc can support breast cancer precision medicine. ClinOmicsTrailbc is a powerful integrated visual analytics tool for breast cancer research in general and for therapy stratification in particular, assisting oncologists to find the best possible treatment options for their breast cancer patients based on actionable, evidence-based results. Availability and implementation: ClinOmicsTrailbc can be freely accessed at https://clinomicstrail.bioinf.uni-sb.de. Supplementary information: Supplementary data are available at Bioinformatics online. |
Lederer, S; Dijkstra, ; T.M.H., ; Heskes, T Additive Dose Response Models: Defining Synergy Frontiers of Pharmacology, 10 (1284), 2019. @article{Lederer_2019, title = {Additive Dose Response Models: Defining Synergy}, author = {Lederer, S. and Dijkstra and T.M.H. and Heskes, T.}, url = {https://www.frontiersin.org/articles/10.3389/fphar.2019.01384/full}, doi = {10.3389/fphar.2019.01384}, year = {2019}, date = {2019-11-26}, journal = {Frontiers of Pharmacology}, volume = {10}, number = {1284}, abstract = {In synergy studies, one focuses on compound combinations that promise a synergistic or antagonistic effect. With the help of high-throughput techniques, a huge amount of compound combinations can be screened and filtered for suitable candidates for a more detailed analysis. Those promising candidates are chosen based on the deviance between a measured response and an expected non-interactive response. A non-interactive response is based on a principle of no interaction, such as Loewe Additivity or Bliss Independence. In a previous study, we introduced, an explicit formulation of the hitherto implicitly defined Loewe Additivity, the so-called Explicit Mean Equation. In the current study we show that this Explicit Mean Equation outperforms the original implicit formulation of Loewe Additivity and Bliss Independence when measuring synergy in terms of the deviance between measured and expected response, called the lack-of-fit. Further, we show that computing synergy as lack-of-fit outperforms a parametric approach. We show this on two datasets of compound combinations that are categorized into synergistic, non-interactive, and antagonistic.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In synergy studies, one focuses on compound combinations that promise a synergistic or antagonistic effect. With the help of high-throughput techniques, a huge amount of compound combinations can be screened and filtered for suitable candidates for a more detailed analysis. Those promising candidates are chosen based on the deviance between a measured response and an expected non-interactive response. A non-interactive response is based on a principle of no interaction, such as Loewe Additivity or Bliss Independence. In a previous study, we introduced, an explicit formulation of the hitherto implicitly defined Loewe Additivity, the so-called Explicit Mean Equation. In the current study we show that this Explicit Mean Equation outperforms the original implicit formulation of Loewe Additivity and Bliss Independence when measuring synergy in terms of the deviance between measured and expected response, called the lack-of-fit. Further, we show that computing synergy as lack-of-fit outperforms a parametric approach. We show this on two datasets of compound combinations that are categorized into synergistic, non-interactive, and antagonistic. |
Richter, Bernadette; Rurik, Marc; Gurk, Stephanie; Kohlbacher, Oliver; Fischer, Markus Food monitoring: Screening of the geographical origin of white asparagus using FT-NIR and machine learning Food Control, 104 , pp. 318-325, 2019. @article{Richter_2019, title = {Food monitoring: Screening of the geographical origin of white asparagus using FT-NIR and machine learning}, author = {Bernadette Richter and Marc Rurik and Stephanie Gurk and Oliver Kohlbacher and Markus Fischer }, url = {https://doi.org/10.1016/j.foodcont.2019.04.032}, doi = {10.1016/j.foodcont.2019.04.032}, year = {2019}, date = {2019-10-01}, journal = {Food Control}, volume = {104}, pages = {318-325}, abstract = {The aim of this study was to experimentally monitor the geographical origin of white asparagus based on near-infrared spectroscopy (NIR). 275 asparagus samples from six countries of origin and three years of harvest were analyzed. Support vector machine (SVM) classifiers were trained to predict the geographical origin and validated using nested cross-validation. When coupled with feature selection, a linear SVM was able to predict the country of origin with an accuracy of 89%. Confidence estimation based on posterior class probabilities can be used to exclude unreliable classifications leading to an accuracy up to 97%. These results demonstrate the potential of NIR spectroscopy combined with machine learning methods as a screening technique for provenance distinction of asparagus.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The aim of this study was to experimentally monitor the geographical origin of white asparagus based on near-infrared spectroscopy (NIR). 275 asparagus samples from six countries of origin and three years of harvest were analyzed. Support vector machine (SVM) classifiers were trained to predict the geographical origin and validated using nested cross-validation. When coupled with feature selection, a linear SVM was able to predict the country of origin with an accuracy of 89%. Confidence estimation based on posterior class probabilities can be used to exclude unreliable classifications leading to an accuracy up to 97%. These results demonstrate the potential of NIR spectroscopy combined with machine learning methods as a screening technique for provenance distinction of asparagus. |
B, Richter; M, Rurik; S, Gurk; O, Kohlbacher; M, Fischer Food monitoring: Screening of the geographical origin of white asparagus using FT-NIR and machine learning Food Control, 104 , pp. 318-325, 2019. @article{Richter_2019b, title = {Food monitoring: Screening of the geographical origin of white asparagus using FT-NIR and machine learning}, author = {Richter B and Rurik M and Gurk S and Kohlbacher O and Fischer M}, url = {https://doi.org/10.1016/j.foodcont.2019.04.032}, doi = {10.1016/j.foodcont.2019.04.032}, year = {2019}, date = {2019-10-01}, journal = {Food Control}, volume = {104}, pages = {318-325}, abstract = {The aim of this study was to experimentally monitor the geographical origin of white asparagus based on near-infrared spectroscopy (NIR). 275 asparagus samples from six countries of origin and three years of harvest were analyzed. Support vector machine (SVM) classifiers were trained to predict the geographical origin and validated using nested cross-validation. When coupled with feature selection, a linear SVM was able to predict the country of origin with an accuracy of 89%. Confidence estimation based on posterior class probabilities can be used to exclude unreliable classifications leading to an accuracy up to 97%. These results demonstrate the potential of NIR spectroscopy combined with machine learning methods as a screening technique for provenance distinction of asparagus.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The aim of this study was to experimentally monitor the geographical origin of white asparagus based on near-infrared spectroscopy (NIR). 275 asparagus samples from six countries of origin and three years of harvest were analyzed. Support vector machine (SVM) classifiers were trained to predict the geographical origin and validated using nested cross-validation. When coupled with feature selection, a linear SVM was able to predict the country of origin with an accuracy of 89%. Confidence estimation based on posterior class probabilities can be used to exclude unreliable classifications leading to an accuracy up to 97%. These results demonstrate the potential of NIR spectroscopy combined with machine learning methods as a screening technique for provenance distinction of asparagus. |
Choobdar, Sarvenaz; Ahsen, Mehmet E; Crawford, Jake; Tomasoni, Mattia; Fang, Tao; Lamparter, David; Lin, Junyuan; Hescott, Benjamin; Hu, Xiaozhe; Mercer, Johnathan; Natoli, Ted; Narayan, Rajiv; and Subramanian, Aravind; Zhang, Jitao D; Stolovitzky, Gustavo; Kutalik, Zoltán; Lage, Kasper; Slonim, Donna K; Saez-Rodriguez, Julio; Cowen, Lenore J; Bergmann, Sven; Marbach, Daniel Assessment of network module identification across complex diseases Nat. Methods, 16 (9), pp. 843–852, 2019. @article{Choobdar265553b, title = {Assessment of network module identification across complex diseases}, author = {Sarvenaz Choobdar and Mehmet E Ahsen and Jake Crawford and Mattia Tomasoni and Tao Fang and David Lamparter and Junyuan Lin and Benjamin Hescott and Xiaozhe Hu and Johnathan Mercer and Ted Natoli and Rajiv Narayan and and Aravind Subramanian and Jitao D Zhang and Gustavo Stolovitzky and Zoltán Kutalik and Kasper Lage and Donna K Slonim and Julio Saez-Rodriguez and Lenore J Cowen and Sven Bergmann and Daniel Marbach}, url = {https://www.nature.com/articles/s41592-019-0509-5.pdf}, doi = {10.1038/s41592-019-0509-5}, year = {2019}, date = {2019-09-01}, journal = {Nat. Methods}, volume = {16}, number = {9}, pages = {843–852}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Identification of modules in molecular networks is at the core of many current analysis methods in biomedical research. However, how well different approaches identify disease-relevant modules in different types of gene and protein networks remains poorly understood. We launched the textquotedblleftDisease Module Identification DREAM Challengetextquotedblright, an open competition to comprehensively assess module identification methods across diverse protein-protein interaction, signaling, gene co-expression, homology, and cancer-gene networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies (GWAS). Our critical assessment of 75 contributed module identification methods reveals novel top-performing algorithms, which recover complementary trait-associated modules. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets and correctly prioritize candidate disease genes. This community challenge establishes benchmarks, tools and guidelines for molecular network analysis to study human disease biology (https://synapse.org/modulechallenge).}, keywords = {}, pubstate = {published}, tppubtype = {article} } Identification of modules in molecular networks is at the core of many current analysis methods in biomedical research. However, how well different approaches identify disease-relevant modules in different types of gene and protein networks remains poorly understood. We launched the textquotedblleftDisease Module Identification DREAM Challengetextquotedblright, an open competition to comprehensively assess module identification methods across diverse protein-protein interaction, signaling, gene co-expression, homology, and cancer-gene networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies (GWAS). Our critical assessment of 75 contributed module identification methods reveals novel top-performing algorithms, which recover complementary trait-associated modules. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets and correctly prioritize candidate disease genes. This community challenge establishes benchmarks, tools and guidelines for molecular network analysis to study human disease biology (https://synapse.org/modulechallenge). |
S, Choobdar; ME, Ahsen; J, Crawford; M, Tomasoni; T, Fang; D, Lamparter; J, Lin; B, Hescott; X, Hu; andNatoli T, Mercer J; R, Narayan; Consortium, DREAM Module Identification Challenge; A, Subramanian; JD, Zhang; G, Stolovitzky; Z, Kutalik; K, Lage; DK, Slonim; J, Saez-Rodriguez; LJ, Cowen; S, Bergmann; D, Marbach Assessment of network module identification across complex diseases Nature Methods, 16 (9), pp. 843-852, 2019. @article{choobdar_2019, title = {Assessment of network module identification across complex diseases}, author = {Choobdar S and Ahsen ME and Crawford J and Tomasoni M and Fang T and Lamparter D and Lin J and Hescott B and Hu X and Mercer J andNatoli T and Narayan R and DREAM Module Identification Challenge Consortium and Subramanian A and Zhang JD and Stolovitzky G and Kutalik Z and Lage K and Slonim DK and Saez-Rodriguez J and Cowen LJ and Bergmann S and Marbach D}, url = {https://doi.org/10.1038/s41592-019-0509-5}, doi = {10.1038/s41592-019-0509-5}, year = {2019}, date = {2019-08-30}, journal = {Nature Methods}, volume = {16}, number = {9}, pages = {843-852}, abstract = {Many bioinformatics methods have been proposed for reducing the complexity of large gene or protein networks into relevant subnetworks or modules. Yet, how such methods compare to each other in terms of their ability to identify disease-relevant modules in different types of network remains poorly understood. We launched the ‘Disease Module Identification DREAM Challenge’, an open competition to comprehensively assess module identification methods across diverse protein–protein interaction, signaling, gene co-expression, homology and cancer-gene networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies. Our robust assessment of 75 module identification methods reveals top-performing algorithms, which recover complementary trait-associated modules. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets. This community challenge establishes biologically interpretable benchmarks, tools and guidelines for molecular network analysis to study human disease biology.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Many bioinformatics methods have been proposed for reducing the complexity of large gene or protein networks into relevant subnetworks or modules. Yet, how such methods compare to each other in terms of their ability to identify disease-relevant modules in different types of network remains poorly understood. We launched the ‘Disease Module Identification DREAM Challenge’, an open competition to comprehensively assess module identification methods across diverse protein–protein interaction, signaling, gene co-expression, homology and cancer-gene networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies. Our robust assessment of 75 module identification methods reveals top-performing algorithms, which recover complementary trait-associated modules. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets. This community challenge establishes biologically interpretable benchmarks, tools and guidelines for molecular network analysis to study human disease biology. |
S, Fillinger; de la L, Garza; A, Peltzer; O, Kohlbacher; S, Nahnsen Challenges of big data integration in the life sciences Analytical and Bioanalytical Chemistry, 411 (26), pp. 6791–6800, 2019. @article{Fillinger.2019, title = {Challenges of big data integration in the life sciences}, author = {Fillinger S and de la Garza L and Peltzer A and Kohlbacher O and Nahnsen S}, url = {https://link.springer.com/article/10.1007%2Fs00216-019-02074-9}, doi = {10.1007/s00216-019-02074-9 }, year = {2019}, date = {2019-08-28}, journal = {Analytical and Bioanalytical Chemistry}, volume = {411}, number = {26}, pages = {6791–6800}, abstract = {Big data has been reported to be revolutionizing many areas of life, including science. It summarizes data that is unprecedentedly large, rapidly generated, heterogeneous, and hard to accurately interpret. This availability has also brought new challenges: How to properly annotate data to make it searchable? What are the legal and ethical hurdles when sharing data? How to store data securely, preventing loss and corruption? The life sciences are not the only disciplines that must align themselves with big data requirements to keep up with the latest developments. The large hadron collider, for instance, generates research data at a pace beyond any current biomedical research center. There are three recent major coinciding events that explain the emergence of big data in the context of research: the technological revolution for data generation, the development of tools for data analysis, and a conceptual change towards open science and data. The true potential of big data lies in pattern discovery in large datasets, as well as the formulation of new models and hypotheses. Confirmation of the existence of the Higgs boson, for instance, is one of the most recent triumphs of big data analysis in physics. Digital representations of biological systems have become more comprehensive. This, in combination with advances in machine learning, creates exciting new research possibilities. In this paper, we review the state of big data in bioanalytical research and provide an overview of the guidelines for its proper usage.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Big data has been reported to be revolutionizing many areas of life, including science. It summarizes data that is unprecedentedly large, rapidly generated, heterogeneous, and hard to accurately interpret. This availability has also brought new challenges: How to properly annotate data to make it searchable? What are the legal and ethical hurdles when sharing data? How to store data securely, preventing loss and corruption? The life sciences are not the only disciplines that must align themselves with big data requirements to keep up with the latest developments. The large hadron collider, for instance, generates research data at a pace beyond any current biomedical research center. There are three recent major coinciding events that explain the emergence of big data in the context of research: the technological revolution for data generation, the development of tools for data analysis, and a conceptual change towards open science and data. The true potential of big data lies in pattern discovery in large datasets, as well as the formulation of new models and hypotheses. Confirmation of the existence of the Higgs boson, for instance, is one of the most recent triumphs of big data analysis in physics. Digital representations of biological systems have become more comprehensive. This, in combination with advances in machine learning, creates exciting new research possibilities. In this paper, we review the state of big data in bioanalytical research and provide an overview of the guidelines for its proper usage. |
van den Brand, J A J G; Dijkstra, T M H; J., Wetzels; Stengel, B; Metzger, M; Blankestijn, P J; Heerspink, Lambers; H.J., ; R.T., Gansevoort Predicting kidney failure from longitudinal kidney function trajectory: a comparison of models PLOS One, 14 (5), 2019. @article{van_den_brand_2019, title = { Predicting kidney failure from longitudinal kidney function trajectory: a comparison of models}, author = {van den Brand, J.A.J.G. and Dijkstra, T.M.H and Wetzels J. and Stengel, B. and Metzger, M. and Blankestijn, P.J. and Lambers Heerspink and H.J. and Gansevoort R.T. }, url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0216559}, doi = {10.1371/journal.pone.0216559}, year = {2019}, date = {2019-05-09}, journal = {PLOS One}, volume = {14}, number = {5}, abstract = {Rationale & objective Early prediction of chronic kidney disease (CKD) progression to end-stage kidney disease (ESKD) currently use Cox models including baseline estimated glomerular filtration rate (eGFR) only. Alternative approaches include a Cox model that includes eGFR slope determined over a baseline period of time, a Cox model with time varying GFR, or a joint modeling approach. We studied if these more complex approaches may further improve ESKD prediction. Study design Prospective cohort. Setting & participants We re-used data from two CKD cohorts including patients with baseline eGFR >30ml/min per 1.73m2. MASTERPLAN (N = 505; 55 ESKD events) was used as development dataset, and NephroTest (N = 1385; 72 events) for validation. Predictors All models included age, sex, eGFR, and albuminuria, known prognostic markers for ESKD. Analytical approach We trained the models on the MASTERPLAN data and determined discrimination and calibration for each model at 2 years follow-up for a prediction horizon of 2 years in the NephroTest cohort. We benchmarked the predictive performance against the Kidney Failure Risk Equation (KFRE). Results The C-statistics for the KFRE was 0.94 (95%CI 0.86 to 1.01). Performance was similar for the Cox model with time-varying eGFR (0.92 [0.84 to 0.97]), eGFR (0.95 [0.90 to 1.00]), and the joint model 0.91 [0.87 to 0.96]). The Cox model with eGFR slope showed the best calibration. Conclusion In the present studies, where the outcome was rare and follow-up data was highly complete, the joint models did not offer improvement in predictive performance over more traditional approaches such as a survival model with time-varying eGFR, or a model with eGFR slope. }, keywords = {}, pubstate = {published}, tppubtype = {article} } Rationale & objective Early prediction of chronic kidney disease (CKD) progression to end-stage kidney disease (ESKD) currently use Cox models including baseline estimated glomerular filtration rate (eGFR) only. Alternative approaches include a Cox model that includes eGFR slope determined over a baseline period of time, a Cox model with time varying GFR, or a joint modeling approach. We studied if these more complex approaches may further improve ESKD prediction. Study design Prospective cohort. Setting & participants We re-used data from two CKD cohorts including patients with baseline eGFR >30ml/min per 1.73m2. MASTERPLAN (N = 505; 55 ESKD events) was used as development dataset, and NephroTest (N = 1385; 72 events) for validation. Predictors All models included age, sex, eGFR, and albuminuria, known prognostic markers for ESKD. Analytical approach We trained the models on the MASTERPLAN data and determined discrimination and calibration for each model at 2 years follow-up for a prediction horizon of 2 years in the NephroTest cohort. We benchmarked the predictive performance against the Kidney Failure Risk Equation (KFRE). Results The C-statistics for the KFRE was 0.94 (95%CI 0.86 to 1.01). Performance was similar for the Cox model with time-varying eGFR (0.92 [0.84 to 0.97]), eGFR (0.95 [0.90 to 1.00]), and the joint model 0.91 [0.87 to 0.96]). The Cox model with eGFR slope showed the best calibration. Conclusion In the present studies, where the outcome was rare and follow-up data was highly complete, the joint models did not offer improvement in predictive performance over more traditional approaches such as a survival model with time-varying eGFR, or a model with eGFR slope. |
Bilich, Tatjana; Nelde, Annika; Bichmann, Leon; Roerden, Malte; Salih, Helmut R; Kowalewski, Daniel J; Schuster, Heiko; Tsou, Chih-Chiang; Marcu, Ana; Neidert, Marian C; Lübke, Maren; Rieth, Jonas; Schemionek, Mirle; Brümmendorf, Tim H; Vucinic, Vladan; Niederwieser, Dietger; Bauer, Jens; Märklin, Melanie; Peper, Janet K; Klein, Reinhild; Kanz, Lothar; Rammensee, Hans-Georg; Stevanovic, Stefan; Walz, Juliane S The HLA ligandome landscape of chronic myeloid leukemia delineates novel T-cell epitopes for immunotherapy Blood, 133 (6), pp. 550-565, 2019. @article{HLACML2019, title = {The HLA ligandome landscape of chronic myeloid leukemia delineates novel T-cell epitopes for immunotherapy}, author = {Tatjana Bilich and Annika Nelde and Leon Bichmann and Malte Roerden and Helmut R Salih and Daniel J Kowalewski and Heiko Schuster and Chih-Chiang Tsou and Ana Marcu and Marian C Neidert and Maren Lübke and Jonas Rieth and Mirle Schemionek and Tim H Brümmendorf and Vladan Vucinic and Dietger Niederwieser and Jens Bauer and Melanie Märklin and Janet K Peper and Reinhild Klein and Lothar Kanz and Hans-Georg Rammensee and Stefan Stevanovic and Juliane S Walz}, url = {http://www.bloodjournal.org/content/133/6/550?sso-checked=true}, doi = {https://doi.org/10.1182/blood-2018-07-866830}, year = {2019}, date = {2019-02-07}, journal = {Blood}, volume = {133}, number = {6}, pages = {550-565}, abstract = {Anti-leukemia immunity plays an important role in disease control and maintenance of tyrosine kinase inhibitor (TKI)-free remission in chronic myeloid leukemia (CML). Thus, antigen-specific immunotherapy holds promise to strengthen immune control in CML, but requires the identification of CML-associated targets. In this study, we used a mass spectrometry-based approach to identify naturally presented, HLA class I- and class II-restricted peptides in primary CML samples. Comparative HLA ligandome profiling using a comprehensive dataset of different hematological benign specimen and samples of CML patients in deep molecular remission delineated a panel of novel, frequently presented, CML-exclusive peptides. These non-mutated target antigens are of particular relevance since our extensive data mining approach suggests absence of naturally presented, BCR-ABL- and ABL-BCR-derived, HLA-restricted peptides and lack of frequent, tumor-exclusive presentation of known cancer/testis and leukemia-associated antigens. Functional characterization revealed spontaneous T-cell responses against the newly identified CML-associated peptides in CML patient samples and their ability to induce multifunctional and cytotoxic antigen-specific T cells de novo in samples of healthy volunteers and CML patients. These antigens are thus prime candidates for T cell-based immunotherapeutic approaches that may prolong TKI-free survival and even mediate cure of CML patients.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Anti-leukemia immunity plays an important role in disease control and maintenance of tyrosine kinase inhibitor (TKI)-free remission in chronic myeloid leukemia (CML). Thus, antigen-specific immunotherapy holds promise to strengthen immune control in CML, but requires the identification of CML-associated targets. In this study, we used a mass spectrometry-based approach to identify naturally presented, HLA class I- and class II-restricted peptides in primary CML samples. Comparative HLA ligandome profiling using a comprehensive dataset of different hematological benign specimen and samples of CML patients in deep molecular remission delineated a panel of novel, frequently presented, CML-exclusive peptides. These non-mutated target antigens are of particular relevance since our extensive data mining approach suggests absence of naturally presented, BCR-ABL- and ABL-BCR-derived, HLA-restricted peptides and lack of frequent, tumor-exclusive presentation of known cancer/testis and leukemia-associated antigens. Functional characterization revealed spontaneous T-cell responses against the newly identified CML-associated peptides in CML patient samples and their ability to induce multifunctional and cytotoxic antigen-specific T cells de novo in samples of healthy volunteers and CML patients. These antigens are thus prime candidates for T cell-based immunotherapeutic approaches that may prolong TKI-free survival and even mediate cure of CML patients. |
Löffler, Markus W; Mohr, Christopher; Bichmann, Leon; Freudenmann, Lena Katharina; Walzer, Mathias; Schroeder, Christopher W; Trautwein, Nico; Hilke, Franz J; Zinser, Raphael S; Mühlenbruch, Lena; Kowalewski, Daniel J; Schuster, Heiko; Sturm, Marc; Matthes, Jakob; Riess, Olaf; Czemmel, Stefan; Nahnsen, Sven; Königsrainer, Ingmar; Thiel, Karolin; Nadalin, Silvio; Beckert, Stefan; Bösmüller, Hans; Fend, Falko; Velic, Ana; Macek, Boris; Haen, Sebastian P; Buonaguro, Luigi; Kohlbacher, Oliver; Königsrainer, Alfred; Rammensee, Hans-Georg; Consortium, The HepaVac Multi-omics discovery of exome-derived neoantigens in hepatocellular carcinoma Genome Med., Forthcoming. @article{LoefflerGenomeMed2019, title = {Multi-omics discovery of exome-derived neoantigens in hepatocellular carcinoma}, author = {Markus W Löffler and Christopher Mohr and Leon Bichmann and Lena Katharina Freudenmann and Mathias Walzer and Christopher W Schroeder and Nico Trautwein and Franz J Hilke and Raphael S Zinser and Lena Mühlenbruch and Daniel J Kowalewski and Heiko Schuster and Marc Sturm and Jakob Matthes and Olaf Riess and Stefan Czemmel and Sven Nahnsen and Ingmar Königsrainer and Karolin Thiel and Silvio Nadalin and Stefan Beckert and Hans Bösmüller and Falko Fend and Ana Velic and Boris Macek and Sebastian P Haen and Luigi Buonaguro and Oliver Kohlbacher and Alfred Königsrainer and Hans-Georg Rammensee and The HepaVac Consortium}, year = {2019}, date = {2019-02-04}, journal = {Genome Med.}, abstract = {Background: Although mutated HLA ligands are considered ideal cancer-specific immunotherapy targets, evidence for their presentation is lacking in hepatocellular carcinomas (HCC). Employing a unique multi-omics approach comprising a neoepitope identification pipeline, we assessed exome-derived mutations naturally presented as HLA class I ligands in HCC. Methods: In-depth multi-omics analyses included whole exome and transcriptome sequencing to define individual patient-specific search spaces of neoepitope candidates. Evidence for the natural presentation of mutated HLA ligands was investigated through an in silico pipeline integrating proteome and HLA ligandome profiling data. Results: The approach was successfully validated in a state-of-the-art dataset from malignant melanoma and despite multi-omics evidence for mutations, mutated naturally presented HLA ligands remained elusive in HCC. An analysis of extensive cancer datasets confirmed fundamental differences in tumor mutational burden in HCC and melanoma, challenging the notion that exome-derived mutations contribute relevantly to the expectable neoepitope pool in malignancies with only few mutations. Conclusions: This study suggests that exome-derived mutated HLA ligands appear to be rarely presented in HCC, inter alia resulting from a low mutational burden as compared to other malignancies such as melanoma. Our results therefore demand widening the target scope for personalized immunotherapy beyond this limited range of mutated neoepitopes, particularly for malignancies with similar or lower mutational burden.}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } Background: Although mutated HLA ligands are considered ideal cancer-specific immunotherapy targets, evidence for their presentation is lacking in hepatocellular carcinomas (HCC). Employing a unique multi-omics approach comprising a neoepitope identification pipeline, we assessed exome-derived mutations naturally presented as HLA class I ligands in HCC. Methods: In-depth multi-omics analyses included whole exome and transcriptome sequencing to define individual patient-specific search spaces of neoepitope candidates. Evidence for the natural presentation of mutated HLA ligands was investigated through an in silico pipeline integrating proteome and HLA ligandome profiling data. Results: The approach was successfully validated in a state-of-the-art dataset from malignant melanoma and despite multi-omics evidence for mutations, mutated naturally presented HLA ligands remained elusive in HCC. An analysis of extensive cancer datasets confirmed fundamental differences in tumor mutational burden in HCC and melanoma, challenging the notion that exome-derived mutations contribute relevantly to the expectable neoepitope pool in malignancies with only few mutations. Conclusions: This study suggests that exome-derived mutated HLA ligands appear to be rarely presented in HCC, inter alia resulting from a low mutational burden as compared to other malignancies such as melanoma. Our results therefore demand widening the target scope for personalized immunotherapy beyond this limited range of mutated neoepitopes, particularly for malignancies with similar or lower mutational burden. |
Ali, Muhammed; Foldvari, Zsofia; Giannakopoulou, Eirini; Böschen, Maxi-Lu; Strønen, Erlend; Yang, Weiwen; Toebes, Mireille; Schubert, Benjamin; Kohlbacher, Oliver; Schumacher, Ton N; Olweus, Johanna Induction Of Neoantigen Reactive T Cells From Healthy Donors Nat. Protocols, Forthcoming. @article{NeoantigensNatProt2019, title = {Induction Of Neoantigen Reactive T Cells From Healthy Donors}, author = {Muhammed Ali and Zsofia Foldvari and Eirini Giannakopoulou and Maxi-Lu Böschen and Erlend Strønen and Weiwen Yang and Mireille Toebes and Benjamin Schubert and Oliver Kohlbacher and Ton N Schumacher and Johanna Olweus}, url = {https://www.nature.com/articles/s41596-019-0170-6}, doi = {10.1038/s41596-019-0170-6}, year = {2019}, date = {2019-02-01}, journal = {Nat. Protocols}, abstract = {Identification of immunogenic neoantigens and their cognate T cells represent the most crucial and rate-limiting steps in the development of personalized cancer immunotherapies that are based on vaccination or on infusion of T cell receptor-engineered T cells. Recent advances in deep sequencing technologies and in silico prediction algorithms enable rapid identification of candidate neoepitopes. However, large scale validation of putative neoepitopes and isolation of reactive T cells is challenging due to limited availablity of patient material and low frequencies of neoepitope-specific T cells. Here, we describe a standardized protocol for induction of neoepitope-reactive T cells from healthy donor T cell repertoires, unaffected by the potentially immunosuppressive environment of the tumor-bearing host. Monocyte-derived dendritic cells transfected with mRNA encoding candidate neoepitopes are utilized to prime autologous naïve CD8+ T cells. Antigen-specific T cells recognizing endogenously processed and presented epitopes are detected using peptide-MHC (pMHC) multimers. Single multimer-positive T cells are sorted for identification of TCR sequences, preceded by an optional step that includes clonal expansion and functional characterization. The time required to identify neoepitope-specific T cells is 15 days, with an additional two to four weeks required for clonal expansion and downstream functional characterization. Identified neoepitopes and corresponding TCRs provide candidates for use in vaccination and TCR-based cancer immunotherapies, and data sets generated by this technology should be of value to improve algorithms to predict immunogenic neoantigens.}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } Identification of immunogenic neoantigens and their cognate T cells represent the most crucial and rate-limiting steps in the development of personalized cancer immunotherapies that are based on vaccination or on infusion of T cell receptor-engineered T cells. Recent advances in deep sequencing technologies and in silico prediction algorithms enable rapid identification of candidate neoepitopes. However, large scale validation of putative neoepitopes and isolation of reactive T cells is challenging due to limited availablity of patient material and low frequencies of neoepitope-specific T cells. Here, we describe a standardized protocol for induction of neoepitope-reactive T cells from healthy donor T cell repertoires, unaffected by the potentially immunosuppressive environment of the tumor-bearing host. Monocyte-derived dendritic cells transfected with mRNA encoding candidate neoepitopes are utilized to prime autologous naïve CD8+ T cells. Antigen-specific T cells recognizing endogenously processed and presented epitopes are detected using peptide-MHC (pMHC) multimers. Single multimer-positive T cells are sorted for identification of TCR sequences, preceded by an optional step that includes clonal expansion and functional characterization. The time required to identify neoepitope-specific T cells is 15 days, with an additional two to four weeks required for clonal expansion and downstream functional characterization. Identified neoepitopes and corresponding TCRs provide candidates for use in vaccination and TCR-based cancer immunotherapies, and data sets generated by this technology should be of value to improve algorithms to predict immunogenic neoantigens. |
Alka, Oliver; Sachsenberg, Timo; Bichmann, Leon; Pfeuffer, Julianus; Weisser, Hendrik; Wein, Samuel; Netz, Eugen; Rurik, Marc; Kohlbacher, Oliver; Rost, Hannes OpenMS for open source analysis of mass spectrometric data PeerJ Preprints, 7 , pp. e27766v1, 2019, ISSN: 2167-9843. @article{10.7287/peerj.preprints.27766v1, title = {OpenMS for open source analysis of mass spectrometric data}, author = {Oliver Alka and Timo Sachsenberg and Leon Bichmann and Julianus Pfeuffer and Hendrik Weisser and Samuel Wein and Eugen Netz and Marc Rurik and Oliver Kohlbacher and Hannes Rost}, url = {https://doi.org/10.7287/peerj.preprints.27766v1}, doi = {10.7287/peerj.preprints.27766v1}, issn = {2167-9843}, year = {2019}, date = {2019-01-01}, journal = {PeerJ Preprints}, volume = {7}, pages = {e27766v1}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Cain, Nicolas; Alka, Oliver; Segelke, Torben; von Wuthenau, Kristian; Kohlbacher, Oliver; Fischer, Markus Food fingerprinting: Mass spectrometric determination of the cocoa shell content (Theobroma cacao L.) in cocoa products by HPLC-QTOF-MS Food Chemistry, 298 , pp. 125013, 2019, ISSN: 0308-8146. @article{CAIN2019125013, title = {Food fingerprinting: Mass spectrometric determination of the cocoa shell content (Theobroma cacao L.) in cocoa products by HPLC-QTOF-MS}, author = {Nicolas Cain and Oliver Alka and Torben Segelke and Kristian von Wuthenau and Oliver Kohlbacher and Markus Fischer}, url = {http://www.sciencedirect.com/science/article/pii/S030881461931115X}, doi = {https://doi.org/10.1016/j.foodchem.2019.125013}, issn = {0308-8146}, year = {2019}, date = {2019-01-01}, journal = {Food Chemistry}, volume = {298}, pages = {125013}, abstract = {The determination of cocoa shell content (Theobroma cacao L.) in cocoa products using a metabolomics approach was accomplished via high performance liquid chromatography quadrupole time-of-flight mass spectrometry (HPLC-QTOF-MS). The developed method was used to separately analyze the polar and non-polar metabolome of the cocoa testa (cocoa shell) and the cocoa cotyledons (cocoa nibs) of cocoa samples from 15 different geographic origins, harvest years, and varieties in positive and negative ion mode. Potential key metabolites were selected which are exclusively contained in the cocoa shell or with significant higher concentration in the cocoa shell than in the cocoa nibs. The pool of potential key metabolites was filtered by established selection criteria, such as temperature stability, fermentations stability, and independence from the geographic origin. Based on these key metabolites an inverse sparse partial least square regression (SPLS) was used for the prediction of the cocoa shell content.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The determination of cocoa shell content (Theobroma cacao L.) in cocoa products using a metabolomics approach was accomplished via high performance liquid chromatography quadrupole time-of-flight mass spectrometry (HPLC-QTOF-MS). The developed method was used to separately analyze the polar and non-polar metabolome of the cocoa testa (cocoa shell) and the cocoa cotyledons (cocoa nibs) of cocoa samples from 15 different geographic origins, harvest years, and varieties in positive and negative ion mode. Potential key metabolites were selected which are exclusively contained in the cocoa shell or with significant higher concentration in the cocoa shell than in the cocoa nibs. The pool of potential key metabolites was filtered by established selection criteria, such as temperature stability, fermentations stability, and independence from the geographic origin. Based on these key metabolites an inverse sparse partial least square regression (SPLS) was used for the prediction of the cocoa shell content. |
Licha, David; Vidali, Silvia; Aminzadeh-Gohari, Sepideh; Alka, Oliver; Breitkreuz, Leander; Kohlbacher, Oliver; Reischl, Roland J; Feichtinger, René G; Kofler, Barbara; Huber, Christian G Untargeted Metabolomics Reveals Molecular Effects of Ketogenic Diet on Healthy and Tumor Xenograft Mouse Models International Journal of Molecular Sciences, 20 (16), 2019, ISSN: 1422-0067. @article{ijms20163873, title = {Untargeted Metabolomics Reveals Molecular Effects of Ketogenic Diet on Healthy and Tumor Xenograft Mouse Models}, author = {David Licha and Silvia Vidali and Sepideh Aminzadeh-Gohari and Oliver Alka and Leander Breitkreuz and Oliver Kohlbacher and Roland J Reischl and René G Feichtinger and Barbara Kofler and Christian G Huber}, url = {https://www.mdpi.com/1422-0067/20/16/3873}, doi = {10.3390/ijms20163873}, issn = {1422-0067}, year = {2019}, date = {2019-01-01}, journal = {International Journal of Molecular Sciences}, volume = {20}, number = {16}, abstract = {The application of ketogenic diet (KD) (high fat/low carbohydrate/adequate protein) as an auxiliary cancer therapy is a field of growing attention. KD provides sufficient energy supply for healthy cells, while possibly impairing energy production in highly glycolytic tumor cells. Moreover, KD regulates insulin and tumor related growth factors (like insulin growth factor-1, IGF-1). In order to provide molecular evidence for the proposed additional inhibition of tumor growth when combining chemotherapy with KD, we applied untargeted quantitative metabolome analysis on a spontaneous breast cancer xenograft mouse model, using MDA-MB-468 cells. Healthy mice and mice bearing breast cancer xenografts and receiving cyclophosphamide chemotherapy were compared after treatment with control diet and KD. Metabolomic profiling was performed on plasma samples, applying high-performance liquid chromatography coupled to tandem mass spectrometry. Statistical analysis revealed metabolic fingerprints comprising numerous significantly regulated features in the group of mice bearing breast cancer. This fingerprint disappeared after treatment with KD, resulting in recovery to the metabolic status observed in healthy mice receiving control diet. Moreover, amino acid metabolism as well as fatty acid transport were found to be affected by both the tumor and the applied KD. Our results provide clear evidence of a significant molecular effect of adjuvant KD in the context of tumor growth inhibition and suggest additional mechanisms of tumor suppression beyond the proposed constrain in energy supply of tumor cells.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The application of ketogenic diet (KD) (high fat/low carbohydrate/adequate protein) as an auxiliary cancer therapy is a field of growing attention. KD provides sufficient energy supply for healthy cells, while possibly impairing energy production in highly glycolytic tumor cells. Moreover, KD regulates insulin and tumor related growth factors (like insulin growth factor-1, IGF-1). In order to provide molecular evidence for the proposed additional inhibition of tumor growth when combining chemotherapy with KD, we applied untargeted quantitative metabolome analysis on a spontaneous breast cancer xenograft mouse model, using MDA-MB-468 cells. Healthy mice and mice bearing breast cancer xenografts and receiving cyclophosphamide chemotherapy were compared after treatment with control diet and KD. Metabolomic profiling was performed on plasma samples, applying high-performance liquid chromatography coupled to tandem mass spectrometry. Statistical analysis revealed metabolic fingerprints comprising numerous significantly regulated features in the group of mice bearing breast cancer. This fingerprint disappeared after treatment with KD, resulting in recovery to the metabolic status observed in healthy mice receiving control diet. Moreover, amino acid metabolism as well as fatty acid transport were found to be affected by both the tumor and the applied KD. Our results provide clear evidence of a significant molecular effect of adjuvant KD in the context of tumor growth inhibition and suggest additional mechanisms of tumor suppression beyond the proposed constrain in energy supply of tumor cells. |
Bichmann, Leon; Nelde, Annika; Ghosh, Michael; Heumos, Lukas; Mohr, Christopher; Peltzer, Alexander; Kuchenbecker, Leon; Sachsenberg, Timo; Walz, Juliane S; Stevanović, Stefan; Rammensee, Hans-Georg; Kohlbacher, Oliver MHCquant: Automated and Reproducible Data Analysis for Immunopeptidomics Journal of Proteome Research, 18 (11), pp. 3876-3884, 2019, (PMID: 31589052). @article{doi:10.1021/acs.jproteome.9b00313, title = {MHCquant: Automated and Reproducible Data Analysis for Immunopeptidomics}, author = {Leon Bichmann and Annika Nelde and Michael Ghosh and Lukas Heumos and Christopher Mohr and Alexander Peltzer and Leon Kuchenbecker and Timo Sachsenberg and Juliane S Walz and Stefan Stevanović and Hans-Georg Rammensee and Oliver Kohlbacher}, url = {https://doi.org/10.1021/acs.jproteome.9b00313}, doi = {10.1021/acs.jproteome.9b00313}, year = {2019}, date = {2019-01-01}, journal = {Journal of Proteome Research}, volume = {18}, number = {11}, pages = {3876-3884}, note = {PMID: 31589052}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Sürün, Bilge; Schärfe, Charlotta P I; Divine, Mathew R; Heinrich, Julian; Toussaint, Nora C; Zimmermann, Lukas; Beha, Janina; Kohlbacher, Oliver ClinVAP: A reporting strategy from variants to therapeutic options Bioinformatics, 2019, ISSN: 1367-4803, (btz924). @article{10.1093/bioinformatics/btz924, title = {ClinVAP: A reporting strategy from variants to therapeutic options}, author = {Bilge Sürün and Charlotta P I Schärfe and Mathew R Divine and Julian Heinrich and Nora C Toussaint and Lukas Zimmermann and Janina Beha and Oliver Kohlbacher}, url = {https://doi.org/10.1093/bioinformatics/btz924}, doi = {10.1093/bioinformatics/btz924}, issn = {1367-4803}, year = {2019}, date = {2019-01-01}, journal = {Bioinformatics}, abstract = {Next-generation sequencing (NGS) has become routine in oncology and opens up new avenues of therapies, particularly in personalized oncology setting. An increasing number of cases also implies a need for a more robust, automated, and reproducible processing of long lists of variants for cancer diagnosis and therapy. While solutions for the large-scale analysis of somatic variants have been implemented, existing solutions often have issues with reproducibility, scalability, and interoperability.ClinVAP is an automated pipeline which annotates, filters, and prioritizes somatic single nucleotide variants (SNVs) provided in variant call format. It augments the variant information with documented or predicted clinical effect. These annotated variants are prioritized based on driver gene status and druggability. ClinVAP is available as a fully containerized, self-contained pipeline maximizing reproducibility and scalability allowing the analysis of larger scale data. The resulting JSON-based report is suited for automated downstream processing, but ClinVAP can also automatically render the information into a user-defined template to yield a human-readable report.ClinVAP is available at https://github.com/PersonalizedOncology/ClinVAPSupplementary data are available at Bioinformatics online.}, note = {btz924}, keywords = {}, pubstate = {published}, tppubtype = {article} } Next-generation sequencing (NGS) has become routine in oncology and opens up new avenues of therapies, particularly in personalized oncology setting. An increasing number of cases also implies a need for a more robust, automated, and reproducible processing of long lists of variants for cancer diagnosis and therapy. While solutions for the large-scale analysis of somatic variants have been implemented, existing solutions often have issues with reproducibility, scalability, and interoperability.ClinVAP is an automated pipeline which annotates, filters, and prioritizes somatic single nucleotide variants (SNVs) provided in variant call format. It augments the variant information with documented or predicted clinical effect. These annotated variants are prioritized based on driver gene status and druggability. ClinVAP is available as a fully containerized, self-contained pipeline maximizing reproducibility and scalability allowing the analysis of larger scale data. The resulting JSON-based report is suited for automated downstream processing, but ClinVAP can also automatically render the information into a user-defined template to yield a human-readable report.ClinVAP is available at https://github.com/PersonalizedOncology/ClinVAPSupplementary data are available at Bioinformatics online. |
Lübke, Maren; Spalt, Stefanie; Kowalewski, Daniel J; Zimmermann, Cosima; Bauersfeld, Liane; Nelde, Annika; Bichmann, Leon; Marcu, Ana; Peper, Janet Kerstin; Kohlbacher, Oliver; Walz, Juliane S; Le-Trilling, Vu Thuy Khanh; Hengel, Hartmut; Rammensee, Hans-Georg; Stevanović, Stefan; Halenius, Anne Identification of HCMV-derived T cell epitopes in seropositive individuals through viral deletion models The Journal of Experimental Medicine, 217 (3), 2019, ISSN: 0022-1007, (e20191164). @article{10.1084/jem.20191164, title = {Identification of HCMV-derived T cell epitopes in seropositive individuals through viral deletion models}, author = {Maren Lübke and Stefanie Spalt and Daniel J Kowalewski and Cosima Zimmermann and Liane Bauersfeld and Annika Nelde and Leon Bichmann and Ana Marcu and Janet Kerstin Peper and Oliver Kohlbacher and Juliane S Walz and Vu Thuy Khanh Le-Trilling and Hartmut Hengel and Hans-Georg Rammensee and Stefan Stevanović and Anne Halenius}, url = {https://doi.org/10.1084/jem.20191164}, doi = {10.1084/jem.20191164}, issn = {0022-1007}, year = {2019}, date = {2019-01-01}, journal = {The Journal of Experimental Medicine}, volume = {217}, number = {3}, abstract = {In healthy individuals, immune control of persistent human cytomegalovirus (HCMV) infection is effectively mediated by virus-specific CD4+ and CD8+ T cells. However, identifying the repertoire of T cell specificities for HCMV is hampered by the immense protein coding capacity of this betaherpesvirus. Here, we present a novel approach that employs HCMV deletion mutant viruses lacking HLA class I immunoevasins and allows direct identification of naturally presented HCMV-derived HLA ligands by mass spectrometry. We identified 368 unique HCMV-derived HLA class I ligands representing an unexpectedly broad panel of 123 HCMV antigens. Functional characterization revealed memory T cell responses in seropositive individuals for a substantial proportion (28%) of these novel peptides. Multiple HCMV-directed specificities in the memory T cell pool of single individuals indicate that physiologic anti-HCMV T cell responses are directed against a broad range of antigens. Thus, the unbiased identification of naturally presented viral epitopes enabled a comprehensive and systematic assessment of the physiological repertoire of anti-HCMV T cell specificities in seropositive individuals.}, note = {e20191164}, keywords = {}, pubstate = {published}, tppubtype = {article} } In healthy individuals, immune control of persistent human cytomegalovirus (HCMV) infection is effectively mediated by virus-specific CD4+ and CD8+ T cells. However, identifying the repertoire of T cell specificities for HCMV is hampered by the immense protein coding capacity of this betaherpesvirus. Here, we present a novel approach that employs HCMV deletion mutant viruses lacking HLA class I immunoevasins and allows direct identification of naturally presented HCMV-derived HLA ligands by mass spectrometry. We identified 368 unique HCMV-derived HLA class I ligands representing an unexpectedly broad panel of 123 HCMV antigens. Functional characterization revealed memory T cell responses in seropositive individuals for a substantial proportion (28%) of these novel peptides. Multiple HCMV-directed specificities in the memory T cell pool of single individuals indicate that physiologic anti-HCMV T cell responses are directed against a broad range of antigens. Thus, the unbiased identification of naturally presented viral epitopes enabled a comprehensive and systematic assessment of the physiological repertoire of anti-HCMV T cell specificities in seropositive individuals. |
Creydt, Marina; Hudzik, Daria; Rurik, Marc; Kohlbacher, Oliver; Fischer, Markus Food Authentication: Small Molecule Profiling as a Tool for the Geographic Discrimination of German White Asparagus J. Agric. Food Chem., 66 (50), pp. 13328-13339, 2018. @article{AsparagusLCMS-2018, title = {Food Authentication: Small Molecule Profiling as a Tool for the Geographic Discrimination of German White Asparagus}, author = {Marina Creydt and Daria Hudzik and Marc Rurik and Oliver Kohlbacher and Markus Fischer}, url = {https://pubs.acs.org/doi/10.1021/acs.jafc.8b05791}, year = {2018}, date = {2018-11-25}, journal = {J. Agric. Food Chem.}, volume = {66}, number = {50}, pages = {13328-13339}, abstract = {For the first time, a non-targeted metabolomics approach by means of ultra-performance liquid chromatography coupled to electrospray quadruple time-of-flight mass spectrometer (UPLC-qTOF-ESI-MS(/MS)) was chosen for the discrimination of geographical origins of white asparagus samples (Asparagus officinalis). Over a period of four harvesting periods (4 years) approximately 400 asparagus samples were measured. Initially, four different LC-MS methods were used to detect as many metabolites as possible and to assess which method is most suitable. The most relevant marker compounds were linked to the influence of different plant stress parameters and climate effects. Some of the samples were also analyzed by isotope-ratio mass spectrometry (IRMS) which is the current gold standard for the discrimination of the geographical origin of asparagus. In summary, the analysis of the metabolome was proved to be quite suitable to determine the geographical origin of asparagus and seems to provide better interpretable results than IRMS studies.}, keywords = {}, pubstate = {published}, tppubtype = {article} } For the first time, a non-targeted metabolomics approach by means of ultra-performance liquid chromatography coupled to electrospray quadruple time-of-flight mass spectrometer (UPLC-qTOF-ESI-MS(/MS)) was chosen for the discrimination of geographical origins of white asparagus samples (Asparagus officinalis). Over a period of four harvesting periods (4 years) approximately 400 asparagus samples were measured. Initially, four different LC-MS methods were used to detect as many metabolites as possible and to assess which method is most suitable. The most relevant marker compounds were linked to the influence of different plant stress parameters and climate effects. Some of the samples were also analyzed by isotope-ratio mass spectrometry (IRMS) which is the current gold standard for the discrimination of the geographical origin of asparagus. In summary, the analysis of the metabolome was proved to be quite suitable to determine the geographical origin of asparagus and seems to provide better interpretable results than IRMS studies. |
Rehm, Markus; Apweiler, Rolf; Beissbarth, Tim; Berthold, Michael; Blüthgen, Nils; Burmeister, Yvonne; Dammann, Olaf; Deutsch, Andreas; Feuerhake, Friederike; Franke, Andre; Hasenauer, Jan; Hoffmann, Steve; Höfer, Thomas; Jansen, Peter; Kaderali, Lars; Klingmüller, Ursula; Koch, Ina; Kohlbacher, Oliver; Kuepfer, Lars; Lammert, Frank; Maier, Dieter; Pfeifer, Nico; Radde, Nicole; Roeder, Ingo; Saez-Rodriguez, Julio; Sax, Ulrich; Schmeck, Bernd; Schuppert, Andreas; Seilheimer, Bernd; Theis, Fabian; Vera-Gonzáles, Julio; Wolkenhauer, Olaf Whither Systems Medicine? Exp. Mol. Med., 50 (3), pp. e453, 2018. @article{WhitherSysMed2017, title = {Whither Systems Medicine?}, author = {Markus Rehm and Rolf Apweiler and Tim Beissbarth and Michael Berthold and Nils Blüthgen and Yvonne Burmeister and Olaf Dammann and Andreas Deutsch and Friederike Feuerhake and Andre Franke and Jan Hasenauer and Steve Hoffmann and Thomas Höfer and Peter Jansen and Lars Kaderali and Ursula Klingmüller and Ina Koch and Oliver Kohlbacher and Lars Kuepfer and Frank Lammert and Dieter Maier and Nico Pfeifer and Nicole Radde and Ingo Roeder and Julio Saez-Rodriguez and Ulrich Sax and Bernd Schmeck and Andreas Schuppert and Bernd Seilheimer and Fabian Theis and Julio Vera-Gonzáles and Olaf Wolkenhauer}, url = {https://www.nature.com/articles/emm2017290}, year = {2018}, date = {2018-01-01}, journal = {Exp. Mol. Med.}, volume = {50}, number = {3}, pages = {e453}, abstract = {ew technologies to generate, store and retrieve medical and research data are inducing a rapid change in clinical and translational research and health care. Systems medicine is the interdisciplinary approach wherein physicians and clinical investigators team up with experts from biology, biostatistics, informatics, mathematics and computational modeling to develop methods to use new and stored data to the benefit of the patient. We here provide a critical assessment of the opportunities and challenges arising out of systems approaches in medicine and from this provide a definition of what systems medicine entails. Based on our analysis of current developments in medicine and healthcare and associated research needs, we emphasize the role of systems medicine as a multilevel and multidisciplinary methodological framework for informed data acquisition and interdisciplinary data analysis to extract previously inaccessible knowledge for the benefit of patients.}, keywords = {}, pubstate = {published}, tppubtype = {article} } ew technologies to generate, store and retrieve medical and research data are inducing a rapid change in clinical and translational research and health care. Systems medicine is the interdisciplinary approach wherein physicians and clinical investigators team up with experts from biology, biostatistics, informatics, mathematics and computational modeling to develop methods to use new and stored data to the benefit of the patient. We here provide a critical assessment of the opportunities and challenges arising out of systems approaches in medicine and from this provide a definition of what systems medicine entails. Based on our analysis of current developments in medicine and healthcare and associated research needs, we emphasize the role of systems medicine as a multilevel and multidisciplinary methodological framework for informed data acquisition and interdisciplinary data analysis to extract previously inaccessible knowledge for the benefit of patients. |
Schubert, Benjamin; Schärfe, Charlotta; Dönnes, Pierre; Hopf, Thomas; Marks, Debora; Kohlbacher, Oliver Population-specific design of de-immunized protein biotherapeutics PLoS Comput Biol, 14 (3), pp. (3):e1005983, 2018. @article{DeImmunoPLoSCB2018, title = {Population-specific design of de-immunized protein biotherapeutics}, author = {Benjamin Schubert and Charlotta Schärfe and Pierre Dönnes and Thomas Hopf and Debora Marks and Oliver Kohlbacher}, url = {https://dx.doi.org/10.1371%2Fjournal.pcbi.1005983}, year = {2018}, date = {2018-01-01}, journal = {PLoS Comput Biol}, volume = {14}, number = {3}, pages = {(3):e1005983}, abstract = {Immunogenicity is a major problem during the development of biotherapeutics since it can lead to rapid clearance of the drug and adverse reactions. The challenge for biotherapeutic design is therefore to identify mutants of the protein sequence that minimize immunogenicity in a target population whilst retaining pharmaceutical activity and protein function. Current approaches are moderately successful in designing sequences with reduced immunogenicity, but do not account for the varying frequencies of different human leucocyte antigen alleles in a specific population and in addition, since many designs are non-functional, require costly experimental post-screening. Here, we report a new method for de-immunization design using multi-objective combinatorial optimization. The method simultaneously optimizes the likelihood of a functional protein sequence at the same time as minimizing its immunogenicity tailored to a target population. We bypass the need for three-dimensional protein structure or molecular simulations to identify functional designs by automatically generating sequences using probabilistic models that have been used previously for mutation effect prediction and structure prediction. As proof-of-principle we designed sequences of the C2 domain of Factor VIII and tested them experimentally, resulting in a good correlation with the predicted immunogenicity of our model.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Immunogenicity is a major problem during the development of biotherapeutics since it can lead to rapid clearance of the drug and adverse reactions. The challenge for biotherapeutic design is therefore to identify mutants of the protein sequence that minimize immunogenicity in a target population whilst retaining pharmaceutical activity and protein function. Current approaches are moderately successful in designing sequences with reduced immunogenicity, but do not account for the varying frequencies of different human leucocyte antigen alleles in a specific population and in addition, since many designs are non-functional, require costly experimental post-screening. Here, we report a new method for de-immunization design using multi-objective combinatorial optimization. The method simultaneously optimizes the likelihood of a functional protein sequence at the same time as minimizing its immunogenicity tailored to a target population. We bypass the need for three-dimensional protein structure or molecular simulations to identify functional designs by automatically generating sequences using probabilistic models that have been used previously for mutation effect prediction and structure prediction. As proof-of-principle we designed sequences of the C2 domain of Factor VIII and tested them experimentally, resulting in a good correlation with the predicted immunogenicity of our model. |
Mohr, Christopher; Friedrich, Andreas; Wojnar, David; Kenar, Erhan; Polatkan, Aydin-Can; Codrea, Marius Cosmin; Czemmel, Stefan; Kohlbacher, Oliver; Nahnsen, Sven qPortal: A platform for data-driven biomedical research PLoS ONE, 13 (1), pp. e0191603, 2018. @article{qPortal-PLoSONE-2018, title = {qPortal: A platform for data-driven biomedical research}, author = {Christopher Mohr and Andreas Friedrich and David Wojnar and Erhan Kenar and Aydin-Can Polatkan and Marius Cosmin Codrea and Stefan Czemmel and Oliver Kohlbacher and Sven Nahnsen}, url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0191603}, year = {2018}, date = {2018-01-01}, journal = {PLoS ONE}, volume = {13}, number = {1}, pages = {e0191603}, abstract = {Modern biomedical research aims at drawing biological conclusions from large, highly complex biological datasets. It has become common practice to make extensive use of high-throughput technologies that produce big amounts of heterogeneous data. In addition to the ever-improving accuracy, methods are getting faster and cheaper, resulting in a steadily increasing need for scalable data management and easily accessible means of analysis. We present qPortal, a platform providing users with an intuitive way to manage and analyze quantitative biological data. The backend leverages a variety of concepts and technologies, such as relational databases, data stores, data models and means of data transfer, as well as front-end solutions to give users access to data management and easy-to-use analysis options. Users are empowered to conduct their experiments from the experimental design to the visualization of their results through the platform. Here, we illustrate the feature-rich portal by simulating a biomedical study based on publically available data. We demonstrate the software’s strength in supporting the entire project life cycle. The software supports the project design and registration, empowers users to do all-digital project management and finally provides means to perform analysis. We compare our approach to Galaxy, one of the most widely used scientific workflow and analysis platforms in computational biology. Application of both systems to a small case study shows the differences between a data-driven approach (qPortal) and a workflow-driven approach (Galaxy). qPortal, a one-stop-shop solution for biomedical projects offers up-to-date analysis pipelines, quality control workflows, and visualization tools. Through intensive user interactions, appropriate data models have been developed. These models build the foundation of our biological data management system and provide possibilities to annotate data, query metadata for statistics and future re-analysis on high-performance computing systems via coupling of workflow management systems. Integration of project and data management as well as workflow resources in one place present clear advantages over existing solutions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Modern biomedical research aims at drawing biological conclusions from large, highly complex biological datasets. It has become common practice to make extensive use of high-throughput technologies that produce big amounts of heterogeneous data. In addition to the ever-improving accuracy, methods are getting faster and cheaper, resulting in a steadily increasing need for scalable data management and easily accessible means of analysis. We present qPortal, a platform providing users with an intuitive way to manage and analyze quantitative biological data. The backend leverages a variety of concepts and technologies, such as relational databases, data stores, data models and means of data transfer, as well as front-end solutions to give users access to data management and easy-to-use analysis options. Users are empowered to conduct their experiments from the experimental design to the visualization of their results through the platform. Here, we illustrate the feature-rich portal by simulating a biomedical study based on publically available data. We demonstrate the software’s strength in supporting the entire project life cycle. The software supports the project design and registration, empowers users to do all-digital project management and finally provides means to perform analysis. We compare our approach to Galaxy, one of the most widely used scientific workflow and analysis platforms in computational biology. Application of both systems to a small case study shows the differences between a data-driven approach (qPortal) and a workflow-driven approach (Galaxy). qPortal, a one-stop-shop solution for biomedical projects offers up-to-date analysis pipelines, quality control workflows, and visualization tools. Through intensive user interactions, appropriate data models have been developed. These models build the foundation of our biological data management system and provide possibilities to annotate data, query metadata for statistics and future re-analysis on high-performance computing systems via coupling of workflow management systems. Integration of project and data management as well as workflow resources in one place present clear advantages over existing solutions. |
Choobdar, Sarvenaz; Ahsen, Mehmet E; Crawford, Jake; Tomasoni, Mattia; Lamparter, David; Lin, Junyuan; Hescott, Benjamin; Hu, Xiaozhe; Mercer, Johnathan; Natoli, Ted; Narayan, Rajiv; Aravind, ; Subramanian, ; Stolovitzky, Gustavo; Kutalik, Zoltán; Lage, Kasper; Slonim, Donna K; Saez-Rodriguez, Julio; Cowen, Lenore J; Bergmann, Sven; Marbach, Daniel Open Community Challenge Reveals Molecular Network Modules with Key Roles in Diseases bioRxiv, 2018. @article{Choobdar265553, title = {Open Community Challenge Reveals Molecular Network Modules with Key Roles in Diseases}, author = {Sarvenaz Choobdar and Mehmet E Ahsen and Jake Crawford and Mattia Tomasoni and David Lamparter and Junyuan Lin and Benjamin Hescott and Xiaozhe Hu and Johnathan Mercer and Ted Natoli and Rajiv Narayan and Aravind and Subramanian and Gustavo Stolovitzky and Zoltán Kutalik and Kasper Lage and Donna K Slonim and Julio Saez-Rodriguez and Lenore J Cowen and Sven Bergmann and Daniel Marbach}, url = {https://www.biorxiv.org/content/early/2018/02/15/265553}, year = {2018}, date = {2018-01-01}, journal = {bioRxiv}, abstract = {Identification of modules in molecular networks is at the core of many current analysis methods in biomedical research. However, how well different approaches identify disease-relevant modules in different types of networks remains poorly understood. We launched the "Disease Module Identification DREAM Challenge", an open competition to comprehensively assess module identification methods across diverse gene, protein and signaling networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies (GWAS). While a number of approaches were successful in terms of discovering complementary trait-associated modules, consensus predictions derived from the challenge submissions performed best. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets and correctly prioritize candidate disease genes. This community challenge establishes benchmarks, tools and guidelines for molecular network analysis to study human disease biology (https://synapse.org/modulechallenge).}, keywords = {}, pubstate = {published}, tppubtype = {article} } Identification of modules in molecular networks is at the core of many current analysis methods in biomedical research. However, how well different approaches identify disease-relevant modules in different types of networks remains poorly understood. We launched the "Disease Module Identification DREAM Challenge", an open competition to comprehensively assess module identification methods across diverse gene, protein and signaling networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies (GWAS). While a number of approaches were successful in terms of discovering complementary trait-associated modules, consensus predictions derived from the challenge submissions performed best. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets and correctly prioritize candidate disease genes. This community challenge establishes benchmarks, tools and guidelines for molecular network analysis to study human disease biology (https://synapse.org/modulechallenge). |
Brunk, Elizabeth; Sahoo, Swagatika; Zielinski, Daniel C; Altunkaya, Ali; Dräger, Andreas; Mih, Nathan; Gatto, Francesco; Nilsson, Avlant; Gonzalez, German Andres Preciat; Aurich, Maike Kathrin; Prlić, Andreas; Sastry, Anand; Danielsdottir, Anna D; Heinken, Almut; Noronha, Alberto; Rose, Peter W; Burley, Stephen K; Fleming, Ronan M T; Nielsen, Jens; Thiele, Ines; Palsson, Bernhard O Recon3D enables a three-dimensional view of gene variation in human metabolism Nature Biotechnology, 2018. @article{articlereference.2018-02-21.3019096605, title = {Recon3D enables a three-dimensional view of gene variation in human metabolism}, author = {Elizabeth Brunk and Swagatika Sahoo and Daniel C Zielinski and Ali Altunkaya and Andreas Dräger and Nathan Mih and Francesco Gatto and Avlant Nilsson and German Andres Preciat Gonzalez and Maike Kathrin Aurich and Andreas Prlić and Anand Sastry and Anna D Danielsdottir and Almut Heinken and Alberto Noronha and Peter W Rose and Stephen K Burley and Ronan M T Fleming and Jens Nielsen and Ines Thiele and Bernhard O Palsson}, url = {https://www.nature.com/articles/nbt.4072}, year = {2018}, date = {2018-01-01}, journal = {Nature Biotechnology}, abstract = {Genome-scale network reconstructions have helped uncover the molecular basis of metabolism. Here we present Recon3D, a computational resource that includes three-dimensional (3D) metabolite and protein structure data and enables integrated analyses of metabolic functions in humans. We use Recon3D to functionally characterize mutations associated with disease, and identify metabolic response signatures that are caused by exposure to certain drugs. Recon3D represents the most comprehensive human metabolic network model to date, accounting for 3,288 open reading frames (representing 17% of functionally annotated human genes), 13,543 metabolic reactions involving 4,140 unique metabolites, and 12,890 protein structures. These data provide a unique resource for investigating molecular mechanisms of human metabolism. Recon3D is available at http://vmh.life .}, keywords = {}, pubstate = {published}, tppubtype = {article} } Genome-scale network reconstructions have helped uncover the molecular basis of metabolism. Here we present Recon3D, a computational resource that includes three-dimensional (3D) metabolite and protein structure data and enables integrated analyses of metabolic functions in humans. We use Recon3D to functionally characterize mutations associated with disease, and identify metabolic response signatures that are caused by exposure to certain drugs. Recon3D represents the most comprehensive human metabolic network model to date, accounting for 3,288 open reading frames (representing 17% of functionally annotated human genes), 13,543 metabolic reactions involving 4,140 unique metabolites, and 12,890 protein structures. These data provide a unique resource for investigating molecular mechanisms of human metabolism. Recon3D is available at http://vmh.life . |
Thijssen, Bram; Dijkstra, Tjeerd M H; Heskes, Tom; Wessels, Lodewyk F A Bayesian data integration for quantifying the contribution of diverse measurements to parameter estimates Bioinformatics, 34 (5), pp. 803-811, 2018. @article{articlereference.2018-03-06.7162030647, title = {Bayesian data integration for quantifying the contribution of diverse measurements to parameter estimates}, author = {Bram Thijssen and Tjeerd M H Dijkstra and Tom Heskes and Lodewyk F A Wessels}, url = {https://academic.oup.com/bioinformatics/article/34/5/803/4563568}, year = {2018}, date = {2018-01-01}, journal = {Bioinformatics}, volume = {34}, number = {5}, pages = {803-811}, abstract = {Motivation: Computational models in biology are frequently underdetermined, due to limits in our capacity to measure biological systems. In particular, mechanistic models often contain parameters whose values are not constrained by a single type ofmeasurement. Itmay be possible to achieve better model determination by combining the information contained in different types of measurements. Bayesian statistics provides a convenient framework for this, allowing a quantification of the reduction in uncertainty with each additional measurement type. We wished to explore whether such integration is feasible and whether it can allow computationalmodels to bemore accurately determined. Results: We created an ordinary differential equation model of cell cycle regulation in budding yeast and integrated data from 13 different studies covering different experimental techniques. We found that for some parameters, a single type of measurement, relative time course mRNA expression, is sufficient to constrain them. Other parameters, however, were only constrained when two types of measurements were combined, namely relative time course and absolute transcript concentration. Comparing the estimates to measurements from three additional, independent studies, we found that the degradation and transcription rates indeed matched the model predictions in order of magnitude. The predicted translation rate was incorrect however, thus revealing a deficiency in the model. Since this parameter was not constrained by any of the measurement types separately, it was only possible to falsify the model when integratingmultiple types ofmeasurements. In conclusion, this study shows that integratingmultiplemeasurement types can allow models to be more accurately determined. Availability and implementation: The models and files required for running the inference are included in the Supplementary information.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: Computational models in biology are frequently underdetermined, due to limits in our capacity to measure biological systems. In particular, mechanistic models often contain parameters whose values are not constrained by a single type ofmeasurement. Itmay be possible to achieve better model determination by combining the information contained in different types of measurements. Bayesian statistics provides a convenient framework for this, allowing a quantification of the reduction in uncertainty with each additional measurement type. We wished to explore whether such integration is feasible and whether it can allow computationalmodels to bemore accurately determined. Results: We created an ordinary differential equation model of cell cycle regulation in budding yeast and integrated data from 13 different studies covering different experimental techniques. We found that for some parameters, a single type of measurement, relative time course mRNA expression, is sufficient to constrain them. Other parameters, however, were only constrained when two types of measurements were combined, namely relative time course and absolute transcript concentration. Comparing the estimates to measurements from three additional, independent studies, we found that the degradation and transcription rates indeed matched the model predictions in order of magnitude. The predicted translation rate was incorrect however, thus revealing a deficiency in the model. Since this parameter was not constrained by any of the measurement types separately, it was only possible to falsify the model when integratingmultiple types ofmeasurements. In conclusion, this study shows that integratingmultiplemeasurement types can allow models to be more accurately determined. Availability and implementation: The models and files required for running the inference are included in the Supplementary information. |
Lederer, Simone; Dijkstra, Tjeerd M H; Heskes, Tom Additive Dose Response Models: Explicit Formulation and the Loewe Additivity Consistency Condition Frontiers in Pharmacology, 9 (31), 2018. @article{articlereference.2018-03-06.2759999135, title = {Additive Dose Response Models: Explicit Formulation and the Loewe Additivity Consistency Condition}, author = {Simone Lederer and Tjeerd M H Dijkstra and Tom Heskes}, url = {https://www.frontiersin.org/articles/10.3389/fphar.2018.00031/full}, year = {2018}, date = {2018-01-01}, journal = {Frontiers in Pharmacology}, volume = {9}, number = {31}, abstract = {High-throughput techniques allow for massive screening of drug combinations. To find combinations that exhibit an interaction effect, one filters for promising compound combinations by comparing to a response without interaction. A common principle for no interaction is Loewe Additivity which is based on the assumption that no compound interacts with itself and that two doses from different compounds having the same effect are equivalent. It then should not matter whether a component is replaced by the other or vice versa. We call this assumption the Loewe Additivity Consistency Condition (LACC). We derive explicit and implicit null reference models from the Loewe Additivity principle that are equivalent when the LACC holds. Of these two formulations, the implicit formulation is the known General Isobole Equation (Loewe, 1928), whereas the explicit one is the novel contribution. The LACC is violated in a significant number of cases. In this scenario the models make different predictions. We analyze two data sets of drug screening that are non-interactive (Cokol et al., 2011; Yadav et al., 2015) and show that the LACC is mostly violated and Loewe Additivity not defined. Further, we compare the measurements of the non-interactive cases of both data sets to the theoretical null reference models in terms of bias and mean squared error. We demonstrate that the explicit formulation of the null reference model leads to smaller mean squared errors than the implicit one and is much faster to compute.}, keywords = {}, pubstate = {published}, tppubtype = {article} } High-throughput techniques allow for massive screening of drug combinations. To find combinations that exhibit an interaction effect, one filters for promising compound combinations by comparing to a response without interaction. A common principle for no interaction is Loewe Additivity which is based on the assumption that no compound interacts with itself and that two doses from different compounds having the same effect are equivalent. It then should not matter whether a component is replaced by the other or vice versa. We call this assumption the Loewe Additivity Consistency Condition (LACC). We derive explicit and implicit null reference models from the Loewe Additivity principle that are equivalent when the LACC holds. Of these two formulations, the implicit formulation is the known General Isobole Equation (Loewe, 1928), whereas the explicit one is the novel contribution. The LACC is violated in a significant number of cases. In this scenario the models make different predictions. We analyze two data sets of drug screening that are non-interactive (Cokol et al., 2011; Yadav et al., 2015) and show that the LACC is mostly violated and Loewe Additivity not defined. Further, we compare the measurements of the non-interactive cases of both data sets to the theoretical null reference models in terms of bias and mean squared error. We demonstrate that the explicit formulation of the null reference model leads to smaller mean squared errors than the implicit one and is much faster to compute. |
Löffler, Markus W; Kowalewski, Daniel J; Backert, Linus; Bernhardt, Jörg; Adam, Patrick; Schuster, Heiko; Dengler, Florian; Backes, Daniel; Kopp, Hans-Georg; Beckert, Stefan; Wagner, Silvia; Königsrainer, Ingmar; Kohlbacher, Oliver; Kanz, Lothar; Königsrainer, Alfred; Rammensee, Hans-Georg; Stevanovic, Stefan; Haen, Sebastian P Mapping the HLA ligandome of Colorectal Cancer Reveals an Imprint of Malignant Cell Transformation Cancer Res., 78 (16), pp. 4627-4641, 2018. @article{LoefflerCanRes2018, title = {Mapping the HLA ligandome of Colorectal Cancer Reveals an Imprint of Malignant Cell Transformation}, author = {Markus W Löffler and Daniel J Kowalewski and Linus Backert and Jörg Bernhardt and Patrick Adam and Heiko Schuster and Florian Dengler and Daniel Backes and Hans-Georg Kopp and Stefan Beckert and Silvia Wagner and Ingmar Königsrainer and Oliver Kohlbacher and Lothar Kanz and Alfred Königsrainer and Hans-Georg Rammensee and Stefan Stevanovic and Sebastian P Haen}, url = {https://doi.org/10.1158/0008-5472.CAN-17-1745}, year = {2018}, date = {2018-01-01}, journal = {Cancer Res.}, volume = {78}, number = {16}, pages = {4627-4641}, abstract = {Immune cell infiltrates have proven highly relevant for colorectal carcinoma (CRC) prognosis, making CRC a promising candidate for immunotherapy. Since tumors interact with the immune system via HLA-presented peptide ligands, exact knowledge of the peptidome constitution is fundamental for understanding this relationship. Here we comprehensively describe the naturally presented HLA-ligandome of CRC and corresponding non-malignant colon (NMC) tissue. Mass spectrometry identified 35,367 and 28,132 HLA-class I ligands on CRC and NMC, attributable to 7,684 and 6,312 distinct source proteins, respectively. Cancer-exclusive peptides were assessed on source protein level using Kyoto Encyclopedia of Genes and Genomes (KEGG) and protein analysis through evolutionary relationships (PANTHER), revealing pathognomonic CRC-associated pathways including Wnt, TGF-β, PI3K, p53, and RTK-RAS. Relative quantitation of peptide presentation on paired CRC and NMC tissue further identified source proteins from cancer- and infection-associated pathways to be over-represented merely within the CRC ligandome. From the pool of tumor-exclusive peptides, a selected HLA-ligand subset was assessed for immunogenicity, with the majority exhibiting an existing T cell repertoire. Overall, these data show that the HLA-ligandome reflects cancer-associated pathways implicated in CRC oncogenesis, suggesting that alterations in tumor cell metabolism could result in cancer-specific, albeit not mutation-derived tumor-antigens. Hence, a defined pool of unique tumor peptides, attributable to complex cellular alterations that are exclusive to malignant cells might comprise promising candidates for immunotherapeutic applications.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Immune cell infiltrates have proven highly relevant for colorectal carcinoma (CRC) prognosis, making CRC a promising candidate for immunotherapy. Since tumors interact with the immune system via HLA-presented peptide ligands, exact knowledge of the peptidome constitution is fundamental for understanding this relationship. Here we comprehensively describe the naturally presented HLA-ligandome of CRC and corresponding non-malignant colon (NMC) tissue. Mass spectrometry identified 35,367 and 28,132 HLA-class I ligands on CRC and NMC, attributable to 7,684 and 6,312 distinct source proteins, respectively. Cancer-exclusive peptides were assessed on source protein level using Kyoto Encyclopedia of Genes and Genomes (KEGG) and protein analysis through evolutionary relationships (PANTHER), revealing pathognomonic CRC-associated pathways including Wnt, TGF-β, PI3K, p53, and RTK-RAS. Relative quantitation of peptide presentation on paired CRC and NMC tissue further identified source proteins from cancer- and infection-associated pathways to be over-represented merely within the CRC ligandome. From the pool of tumor-exclusive peptides, a selected HLA-ligand subset was assessed for immunogenicity, with the majority exhibiting an existing T cell repertoire. Overall, these data show that the HLA-ligandome reflects cancer-associated pathways implicated in CRC oncogenesis, suggesting that alterations in tumor cell metabolism could result in cancer-specific, albeit not mutation-derived tumor-antigens. Hence, a defined pool of unique tumor peptides, attributable to complex cellular alterations that are exclusive to malignant cells might comprise promising candidates for immunotherapeutic applications. |
Kahles, André; Lehmann, Kjong-Van; Toussaint, Nora C; Hüser, Matthias; Stark, Stefan; Sachsenberg, Timo; Stegle, Oliver; Kohlbacher, Oliver; Sander, Chris; Network, Gunnar TCGA PanCanAtlas; Rätsch, Comprehensive Analysis of Alternative Splicing Across Tumors from 8,705 Patients Cancer Cell, 34 (2), pp. 211-224, 2018. @article{SplicedEpitopesCancerCell2018, title = {Comprehensive Analysis of Alternative Splicing Across Tumors from 8,705 Patients}, author = {André Kahles and Kjong-Van Lehmann and Nora C Toussaint and Matthias Hüser and Stefan Stark and Timo Sachsenberg and Oliver Stegle and Oliver Kohlbacher and Chris Sander and Gunnar TCGA PanCanAtlas Network and Rätsch}, url = {https://www.sciencedirect.com/science/article/pii/S1535610818303064?via%3Dihub}, year = {2018}, date = {2018-01-01}, journal = {Cancer Cell}, volume = {34}, number = {2}, pages = {211-224}, abstract = {Our comprehensive analysis of alternative splicing across 32 The Cancer Genome Atlas cancer types from 8,705 patients detects alternative splicing events and tumor variants by reanalyzing RNA and whole-exome sequencing data. Tumors have up to 30% more alternative splicing events than normal samples. Association analysis of somatic variants with alternative splicing events confirmed known trans associations with variants in SF3B1 and U2AF1 and identified additional trans-acting variants (e.g., TADA1, PPP2R1A). Many tumors have thousands of alternative splicing events not detectable in normal samples; on average, we identified ≈930 exon-exon junctions (“neojunctions”) in tumors not typically found in GTEx normals. From Clinical Proteomic Tumor Analysis Consortium data available for breast and ovarian tumor samples, we confirmed ≈1.7 neojunction- and ≈0.6 single nucleotide variant-derived peptides per tumor sample that are also predicted major histocompatibility complex-I binders (“putative neoantigens”).}, keywords = {}, pubstate = {published}, tppubtype = {article} } Our comprehensive analysis of alternative splicing across 32 The Cancer Genome Atlas cancer types from 8,705 patients detects alternative splicing events and tumor variants by reanalyzing RNA and whole-exome sequencing data. Tumors have up to 30% more alternative splicing events than normal samples. Association analysis of somatic variants with alternative splicing events confirmed known trans associations with variants in SF3B1 and U2AF1 and identified additional trans-acting variants (e.g., TADA1, PPP2R1A). Many tumors have thousands of alternative splicing events not detectable in normal samples; on average, we identified ≈930 exon-exon junctions (“neojunctions”) in tumors not typically found in GTEx normals. From Clinical Proteomic Tumor Analysis Consortium data available for breast and ovarian tumor samples, we confirmed ≈1.7 neojunction- and ≈0.6 single nucleotide variant-derived peptides per tumor sample that are also predicted major histocompatibility complex-I binders (“putative neoantigens”). |
Fröhlich, Holger; Balling, Rudi; Beerenwinkel, Niko; Kohlbacher, Oliver; Kumar, Santosh; Lengauer, Thomas; Maathuis, Marloes; Moreau, Yves; Murphy, Susan; Przytycka, Teresa; Röst, Hannes; Rebhan, Michael; Schuppert, Andreas; Schwab, Matthias; Spang, Rainer; Stekhoven, Daniel; Sun, Jimeng; Ziemek, Daniel; Zupan, Blaz From Hype to Reality: Data Science enabling Personalized Medicine BMC Medicine, 16 , pp. 150, 2018. @article{PM_Dagstuhl_2018, title = {From Hype to Reality: Data Science enabling Personalized Medicine}, author = {Holger Fröhlich and Rudi Balling and Niko Beerenwinkel and Oliver Kohlbacher and Santosh Kumar and Thomas Lengauer and Marloes Maathuis and Yves Moreau and Susan Murphy and Teresa Przytycka and Hannes Röst and Michael Rebhan and Andreas Schuppert and Matthias Schwab and Rainer Spang and Daniel Stekhoven and Jimeng Sun and Daniel Ziemek and Blaz Zupan}, url = {https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-018-1122-7}, year = {2018}, date = {2018-01-01}, journal = {BMC Medicine}, volume = {16}, pages = {150}, abstract = {Background Personalized, precision, P4, or stratified medicine is understood as a medical approach in which patients are stratified based on their disease subtype, risk, prognosis, or treatment response using specialized diagnostic tests. The key idea is to base medical decisions on individual patient characteristics, including molecular and behavioral biomarkers, rather than on population averages. Personalized medicine is deeply connected to and dependent on data science, specifically machine learning (often named Artificial Intelligence in the mainstream media). While during recent years there has been a lot of enthusiasm about the potential of ‘big data’ and machine learning-based solutions, there exist only few examples that impact current clinical practice. The lack of impact on clinical practice can largely be attributed to insufficient performance of predictive models, difficulties to interpret complex model predictions, and lack of validation via prospective clinical trials that demonstrate a clear benefit compared to the standard of care. In this paper, we review the potential of state-of-the-art data science approaches for personalized medicine, discuss open challenges, and highlight directions that may help to overcome them in the future. Conclusions There is a need for an interdisciplinary effort, including data scientists, physicians, patient advocates, regulatory agencies, and health insurance organizations. Partially unrealistic expectations and concerns about data science-based solutions need to be better managed. In parallel, computational methods must advance more to provide direct benefit to clinical practice.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Personalized, precision, P4, or stratified medicine is understood as a medical approach in which patients are stratified based on their disease subtype, risk, prognosis, or treatment response using specialized diagnostic tests. The key idea is to base medical decisions on individual patient characteristics, including molecular and behavioral biomarkers, rather than on population averages. Personalized medicine is deeply connected to and dependent on data science, specifically machine learning (often named Artificial Intelligence in the mainstream media). While during recent years there has been a lot of enthusiasm about the potential of ‘big data’ and machine learning-based solutions, there exist only few examples that impact current clinical practice. The lack of impact on clinical practice can largely be attributed to insufficient performance of predictive models, difficulties to interpret complex model predictions, and lack of validation via prospective clinical trials that demonstrate a clear benefit compared to the standard of care. In this paper, we review the potential of state-of-the-art data science approaches for personalized medicine, discuss open challenges, and highlight directions that may help to overcome them in the future. Conclusions There is a need for an interdisciplinary effort, including data scientists, physicians, patient advocates, regulatory agencies, and health insurance organizations. Partially unrealistic expectations and concerns about data science-based solutions need to be better managed. In parallel, computational methods must advance more to provide direct benefit to clinical practice. |
Prasser, Fabian; Kohlbacher, Oliver; Mansmann, Ulrich; Bauer, Bernhard; Kuhn, Klaus A Data Integration for Future Medicine (DIFUTURE) - An Architectural and Methodological Overview Methods Inf Med, 57 (S01), pp. e57-r65, 2018. @article{DIFUTURE_Methods_2018, title = {Data Integration for Future Medicine (DIFUTURE) - An Architectural and Methodological Overview}, author = {Fabian Prasser and Oliver Kohlbacher and Ulrich Mansmann and Bernhard Bauer and Klaus A Kuhn}, url = {https://www.thieme-connect.de/products/ejournals/abstract/10.3414/ME17-02-0022}, year = {2018}, date = {2018-01-01}, journal = {Methods Inf Med}, volume = {57}, number = {S01}, pages = {e57-r65}, abstract = {Introduction: This article is part of the Focus Theme of Methods of Information in Medicine on the German Medical Informatics Initiative. Future medicine will be predictive, preventive, personalized, participatory and digital. Data and knowledge at comprehensive depth and breadth need to be available for research and at the point of care as a basis for targeted diagnosis and therapy. Data integration and data sharing will be essential to achieve these goals. For this purpose, the consortium Data Integration for Future Medicine (DIFUTURE) will establish Data Integration Centers (DICs) at university medical centers. Objectives: The infrastructure envisioned by DIFUTURE will provide researchers with cross-site access to data and support physicians by innovative views on integrated data as well as by decision support components for personalized treatments. The aim of our use cases is to show that this accelerates innovation, improves health care processes and results in tangible benefits for our patients. To realize our vision, numerous challenges have to be addressed. The objective of this article is to describe our concepts and solutions on the technical and the organizational level with a specific focus on data integration and sharing. Governance and Policies: Data sharing implies significant security and privacy challenges. Therefore, state-of-the-art data protection, modern IT security concepts and patient trust play a central role in our approach. We have established governance structures and policies safeguarding data use and sharing by technical and organizational measures providing highest levels of data protection. One of our central policies is that adequate methods of data sharing for each use case and project will be selected based on rigorous risk and threat analyses. Interdisciplinary groups have been installed in order to manage change. Architectural Framework and Methodology: The DIFUTURE Data Integration Centers will implement a three-step approach to integrating, harmonizing and sharing structured, unstructured and omics data as well as images from clinical and research environments. First, data is imported and technically harmonized using common data and interface standards (including various IHE profiles, DICOM and HL7 FHIR). Second, data is preprocessed, transformed, harmonized and enriched within a staging and working environment. Third, data is imported into common analytics platforms and data models (including i2b2 and tranSMART) and made accessible in a form compliant with the interoperability requirements defined on the national level. Secure data access and sharing will be implemented with innovative combinations of privacy-enhancing technologies (safe data, safe settings, safe outputs) and methods of distributed computing. Use Cases: From the perspective of health care and medical research, our approach is disease-oriented and use-case driven, i.e. following the needs of physicians and researchers and aiming at measurable benefits for our patients. We will work on early diagnosis, tailored therapies and therapy decision tools with focuses on neurology, oncology and further disease entities. Our early uses cases will serve as blueprints for the following ones, verifying that the infrastructure developed by DIFUTURE is able to support a variety of application scenarios. Discussion: Own previous work, the use of internationally successful open source systems and a state-of-the-art software architecture are cornerstones of our approach. In the conceptual phase of the initiative, we have already prototypically implemented and tested the most important components of our architecture.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Introduction: This article is part of the Focus Theme of Methods of Information in Medicine on the German Medical Informatics Initiative. Future medicine will be predictive, preventive, personalized, participatory and digital. Data and knowledge at comprehensive depth and breadth need to be available for research and at the point of care as a basis for targeted diagnosis and therapy. Data integration and data sharing will be essential to achieve these goals. For this purpose, the consortium Data Integration for Future Medicine (DIFUTURE) will establish Data Integration Centers (DICs) at university medical centers. Objectives: The infrastructure envisioned by DIFUTURE will provide researchers with cross-site access to data and support physicians by innovative views on integrated data as well as by decision support components for personalized treatments. The aim of our use cases is to show that this accelerates innovation, improves health care processes and results in tangible benefits for our patients. To realize our vision, numerous challenges have to be addressed. The objective of this article is to describe our concepts and solutions on the technical and the organizational level with a specific focus on data integration and sharing. Governance and Policies: Data sharing implies significant security and privacy challenges. Therefore, state-of-the-art data protection, modern IT security concepts and patient trust play a central role in our approach. We have established governance structures and policies safeguarding data use and sharing by technical and organizational measures providing highest levels of data protection. One of our central policies is that adequate methods of data sharing for each use case and project will be selected based on rigorous risk and threat analyses. Interdisciplinary groups have been installed in order to manage change. Architectural Framework and Methodology: The DIFUTURE Data Integration Centers will implement a three-step approach to integrating, harmonizing and sharing structured, unstructured and omics data as well as images from clinical and research environments. First, data is imported and technically harmonized using common data and interface standards (including various IHE profiles, DICOM and HL7 FHIR). Second, data is preprocessed, transformed, harmonized and enriched within a staging and working environment. Third, data is imported into common analytics platforms and data models (including i2b2 and tranSMART) and made accessible in a form compliant with the interoperability requirements defined on the national level. Secure data access and sharing will be implemented with innovative combinations of privacy-enhancing technologies (safe data, safe settings, safe outputs) and methods of distributed computing. Use Cases: From the perspective of health care and medical research, our approach is disease-oriented and use-case driven, i.e. following the needs of physicians and researchers and aiming at measurable benefits for our patients. We will work on early diagnosis, tailored therapies and therapy decision tools with focuses on neurology, oncology and further disease entities. Our early uses cases will serve as blueprints for the following ones, verifying that the infrastructure developed by DIFUTURE is able to support a variety of application scenarios. Discussion: Own previous work, the use of internationally successful open source systems and a state-of-the-art software architecture are cornerstones of our approach. In the conceptual phase of the initiative, we have already prototypically implemented and tested the most important components of our architecture. |
Gleim, Lars Christoph; Karim, Md. Rezaul; Zimmermann, Lukas; Stenzhorn, Holger; Decker, Stefan; Beyan, Oya Schema Extraction for Privacy Preserving Processing of Sensitive Data MEPDaW-SeWeBMeDA-SWeTI 2018 Joint Proceedings of MEPDaW, SeWeBMeDA and SWeTI 2018, 2112 , CEUR Workshop Proceedings 2018. @conference{Extraction_2018, title = {Schema Extraction for Privacy Preserving Processing of Sensitive Data}, author = {Lars Christoph Gleim and Md. Rezaul Karim and Lukas Zimmermann and Holger Stenzhorn and Stefan Decker and Oya Beyan}, url = {http://ceur-ws.org/Vol-2112/sewebmeda_paper_1.pdf}, year = {2018}, date = {2018-01-01}, booktitle = {MEPDaW-SeWeBMeDA-SWeTI 2018 Joint Proceedings of MEPDaW, SeWeBMeDA and SWeTI 2018}, volume = {2112}, pages = {36-47}, series = {CEUR Workshop Proceedings}, abstract = {Lars Christoph Gleim, Md. Rezaul Karim, Lukas Zimmermann, Oliver Kohlbacher, Holger Stenzhorn, Stefan Decker, Oya Beyan Sharing privacy sensitive data across organizational boundaries is commonly not a viable option due to the legal and ethical restrictions. Regulations such as the EU General Data Protection Rules impose strict requirements concerning the protection of personal data. Therefore new approaches are emerging to utilize data right in their original repositories without giving direct access to third parties, such as the Personal Health Train initiative [16]. Circumventing limitations of previous systems, this paper proposes an automated schema extraction approach compatible with existing Semantic Web-based technologies. The extracted schema enables ad-hoc query formulation against privacy sensitive data sources without requiring data access, and successive execution of that request in a secure enclave under the data provider’s control. The developed approach permit us to extract structural information from non-uniformed resources and merge it into a single schema to preserve the privacy of each data source. Initial experiments show that our approach overcomes the reliance of previous approaches on agreeing upon shared schema and encoding a priori in favor of more flexible schema extraction and introspection.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Lars Christoph Gleim, Md. Rezaul Karim, Lukas Zimmermann, Oliver Kohlbacher, Holger Stenzhorn, Stefan Decker, Oya Beyan Sharing privacy sensitive data across organizational boundaries is commonly not a viable option due to the legal and ethical restrictions. Regulations such as the EU General Data Protection Rules impose strict requirements concerning the protection of personal data. Therefore new approaches are emerging to utilize data right in their original repositories without giving direct access to third parties, such as the Personal Health Train initiative [16]. Circumventing limitations of previous systems, this paper proposes an automated schema extraction approach compatible with existing Semantic Web-based technologies. The extracted schema enables ad-hoc query formulation against privacy sensitive data sources without requiring data access, and successive execution of that request in a secure enclave under the data provider’s control. The developed approach permit us to extract structural information from non-uniformed resources and merge it into a single schema to preserve the privacy of each data source. Initial experiments show that our approach overcomes the reliance of previous approaches on agreeing upon shared schema and encoding a priori in favor of more flexible schema extraction and introspection. |