Publications
Lara Schneider Tim Kehl, Kristina Thedinga Nadja Grammes Christina Backes Christopher Mohr Benjamin Schubert Kerstin Lenhof Nico Gerstner Andreas Daniel Hartkopf Markus Wallwiener Oliver Kohlbacher Andreas Keller Eckart Meese Norbert Graf Hans-Peter Lenhof L ClinOmicsTrailbc: a visual analytics tool for breast cancer treatment stratification Bioinformatics, 35 (24), pp. 5171-5181, 2019. @article{SchneiderClinOmics2019, title = {ClinOmicsTrailbc: a visual analytics tool for breast cancer treatment stratification}, author = {Lara Schneider, Tim Kehl, Kristina Thedinga, Nadja L. Grammes, Christina Backes, Christopher Mohr, Benjamin Schubert, Kerstin Lenhof, Nico Gerstner, Andreas Daniel Hartkopf, Markus Wallwiener, Oliver Kohlbacher, Andreas Keller, Eckart Meese, Norbert Graf, Hans-Peter Lenhof}, doi = {10.1093/bioinformatics/btz302}, year = {2019}, date = {2019-12-15}, journal = {Bioinformatics}, volume = {35}, number = {24}, pages = {5171-5181}, abstract = {Motivation Breast cancer is the second leading cause of cancer death among women. Tumors, even of the same histopathological subtype, exhibit a high genotypic diversity that impedes therapy stratification and that hence must be accounted for in the treatment decision-making process. Results Here, we present ClinOmicsTrailbc, a comprehensive visual analytics tool for breast cancer decision support that provides a holistic assessment of standard-of-care targeted drugs, candidates for drug repositioning and immunotherapeutic approaches. To this end, our tool analyzes and visualizes clinical markers and (epi-)genomics and transcriptomics datasets to identify and evaluate the tumor’s main driver mutations, the tumor mutational burden, activity patterns of core cancer-relevant pathways, drug-specific biomarkers, the status of molecular drug targets and pharmacogenomic influences. In order to demonstrate ClinOmicsTrailbc’s rich functionality, we present three case studies highlighting various ways in which ClinOmicsTrailbc can support breast cancer precision medicine. ClinOmicsTrailbc is a powerful integrated visual analytics tool for breast cancer research in general and for therapy stratification in particular, assisting oncologists to find the best possible treatment options for their breast cancer patients based on actionable, evidence-based results. Availability and implementation ClinOmicsTrailbc can be freely accessed at https://clinomicstrail.bioinf.uni-sb.de. Supplementary information Supplementary data are available at Bioinformatics online.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation Breast cancer is the second leading cause of cancer death among women. Tumors, even of the same histopathological subtype, exhibit a high genotypic diversity that impedes therapy stratification and that hence must be accounted for in the treatment decision-making process. Results Here, we present ClinOmicsTrailbc, a comprehensive visual analytics tool for breast cancer decision support that provides a holistic assessment of standard-of-care targeted drugs, candidates for drug repositioning and immunotherapeutic approaches. To this end, our tool analyzes and visualizes clinical markers and (epi-)genomics and transcriptomics datasets to identify and evaluate the tumor’s main driver mutations, the tumor mutational burden, activity patterns of core cancer-relevant pathways, drug-specific biomarkers, the status of molecular drug targets and pharmacogenomic influences. In order to demonstrate ClinOmicsTrailbc’s rich functionality, we present three case studies highlighting various ways in which ClinOmicsTrailbc can support breast cancer precision medicine. ClinOmicsTrailbc is a powerful integrated visual analytics tool for breast cancer research in general and for therapy stratification in particular, assisting oncologists to find the best possible treatment options for their breast cancer patients based on actionable, evidence-based results. Availability and implementation ClinOmicsTrailbc can be freely accessed at https://clinomicstrail.bioinf.uni-sb.de. Supplementary information Supplementary data are available at Bioinformatics online. |
L, Schneider; T, Kehl; K, Thedinga; NL, Grammes; C, Backes; C, Mohr; B, Schubert; K, Lenhof; N, Gerstner; AD, Hartkopf; M, Wallwiener; O, Kohlbacher; A, Keller; E, Meese; N, Graf; HP, Lenhof ClinOmicsTrailbc: a visual analytics tool for breast cancer treatment stratification. Bioinformatics, 35 (24), pp. 5171-5181, 2019. @article{Schneider_2019, title = {ClinOmicsTrailbc: a visual analytics tool for breast cancer treatment stratification.}, author = {Schneider L and Kehl T and Thedinga K and Grammes NL and Backes C and Mohr C and Schubert B and Lenhof K and Gerstner N and Hartkopf AD and Wallwiener M and Kohlbacher O and Keller A and Meese E and Graf N and Lenhof HP}, url = {https://academic.oup.com/bioinformatics/article/35/24/5171/5481954}, doi = {10.1093/bioinformatics/btz302 }, year = {2019}, date = {2019-12-15}, journal = {Bioinformatics}, volume = {35}, number = {24}, pages = {5171-5181}, abstract = { Motivation: Breast cancer is the second leading cause of cancer death among women. Tumors, even of the same histopathological subtype, exhibit a high genotypic diversity that impedes therapy stratification and that hence must be accounted for in the treatment decision-making process. Results: Here, we present ClinOmicsTrailbc, a comprehensive visual analytics tool for breast cancer decision support that provides a holistic assessment of standard-of-care targeted drugs, candidates for drug repositioning and immunotherapeutic approaches. To this end, our tool analyzes and visualizes clinical markers and (epi-)genomics and transcriptomics datasets to identify and evaluate the tumor's main driver mutations, the tumor mutational burden, activity patterns of core cancer-relevant pathways, drug-specific biomarkers, the status of molecular drug targets and pharmacogenomic influences. In order to demonstrate ClinOmicsTrailbc's rich functionality, we present three case studies highlighting various ways in which ClinOmicsTrailbc can support breast cancer precision medicine. ClinOmicsTrailbc is a powerful integrated visual analytics tool for breast cancer research in general and for therapy stratification in particular, assisting oncologists to find the best possible treatment options for their breast cancer patients based on actionable, evidence-based results. Availability and implementation: ClinOmicsTrailbc can be freely accessed at https://clinomicstrail.bioinf.uni-sb.de. Supplementary information: Supplementary data are available at Bioinformatics online. }, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: Breast cancer is the second leading cause of cancer death among women. Tumors, even of the same histopathological subtype, exhibit a high genotypic diversity that impedes therapy stratification and that hence must be accounted for in the treatment decision-making process. Results: Here, we present ClinOmicsTrailbc, a comprehensive visual analytics tool for breast cancer decision support that provides a holistic assessment of standard-of-care targeted drugs, candidates for drug repositioning and immunotherapeutic approaches. To this end, our tool analyzes and visualizes clinical markers and (epi-)genomics and transcriptomics datasets to identify and evaluate the tumor's main driver mutations, the tumor mutational burden, activity patterns of core cancer-relevant pathways, drug-specific biomarkers, the status of molecular drug targets and pharmacogenomic influences. In order to demonstrate ClinOmicsTrailbc's rich functionality, we present three case studies highlighting various ways in which ClinOmicsTrailbc can support breast cancer precision medicine. ClinOmicsTrailbc is a powerful integrated visual analytics tool for breast cancer research in general and for therapy stratification in particular, assisting oncologists to find the best possible treatment options for their breast cancer patients based on actionable, evidence-based results. Availability and implementation: ClinOmicsTrailbc can be freely accessed at https://clinomicstrail.bioinf.uni-sb.de. Supplementary information: Supplementary data are available at Bioinformatics online. |
Lederer, S; Dijkstra, ; T.M.H., ; Heskes, T Additive Dose Response Models: Defining Synergy Frontiers of Pharmacology, 10 (1284), 2019. @article{Lederer_2019, title = {Additive Dose Response Models: Defining Synergy}, author = {Lederer, S. and Dijkstra and T.M.H. and Heskes, T.}, url = {https://www.frontiersin.org/articles/10.3389/fphar.2019.01384/full}, doi = {10.3389/fphar.2019.01384}, year = {2019}, date = {2019-11-26}, journal = {Frontiers of Pharmacology}, volume = {10}, number = {1284}, abstract = {In synergy studies, one focuses on compound combinations that promise a synergistic or antagonistic effect. With the help of high-throughput techniques, a huge amount of compound combinations can be screened and filtered for suitable candidates for a more detailed analysis. Those promising candidates are chosen based on the deviance between a measured response and an expected non-interactive response. A non-interactive response is based on a principle of no interaction, such as Loewe Additivity or Bliss Independence. In a previous study, we introduced, an explicit formulation of the hitherto implicitly defined Loewe Additivity, the so-called Explicit Mean Equation. In the current study we show that this Explicit Mean Equation outperforms the original implicit formulation of Loewe Additivity and Bliss Independence when measuring synergy in terms of the deviance between measured and expected response, called the lack-of-fit. Further, we show that computing synergy as lack-of-fit outperforms a parametric approach. We show this on two datasets of compound combinations that are categorized into synergistic, non-interactive, and antagonistic.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In synergy studies, one focuses on compound combinations that promise a synergistic or antagonistic effect. With the help of high-throughput techniques, a huge amount of compound combinations can be screened and filtered for suitable candidates for a more detailed analysis. Those promising candidates are chosen based on the deviance between a measured response and an expected non-interactive response. A non-interactive response is based on a principle of no interaction, such as Loewe Additivity or Bliss Independence. In a previous study, we introduced, an explicit formulation of the hitherto implicitly defined Loewe Additivity, the so-called Explicit Mean Equation. In the current study we show that this Explicit Mean Equation outperforms the original implicit formulation of Loewe Additivity and Bliss Independence when measuring synergy in terms of the deviance between measured and expected response, called the lack-of-fit. Further, we show that computing synergy as lack-of-fit outperforms a parametric approach. We show this on two datasets of compound combinations that are categorized into synergistic, non-interactive, and antagonistic. |
Richter, Bernadette; Rurik, Marc; Gurk, Stephanie; Kohlbacher, Oliver; Fischer, Markus Food monitoring: Screening of the geographical origin of white asparagus using FT-NIR and machine learning Food Control, 104 , pp. 318-325, 2019. @article{Richter_2019, title = {Food monitoring: Screening of the geographical origin of white asparagus using FT-NIR and machine learning}, author = {Bernadette Richter and Marc Rurik and Stephanie Gurk and Oliver Kohlbacher and Markus Fischer }, url = {https://doi.org/10.1016/j.foodcont.2019.04.032}, doi = {10.1016/j.foodcont.2019.04.032}, year = {2019}, date = {2019-10-01}, journal = {Food Control}, volume = {104}, pages = {318-325}, abstract = {The aim of this study was to experimentally monitor the geographical origin of white asparagus based on near-infrared spectroscopy (NIR). 275 asparagus samples from six countries of origin and three years of harvest were analyzed. Support vector machine (SVM) classifiers were trained to predict the geographical origin and validated using nested cross-validation. When coupled with feature selection, a linear SVM was able to predict the country of origin with an accuracy of 89%. Confidence estimation based on posterior class probabilities can be used to exclude unreliable classifications leading to an accuracy up to 97%. These results demonstrate the potential of NIR spectroscopy combined with machine learning methods as a screening technique for provenance distinction of asparagus.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The aim of this study was to experimentally monitor the geographical origin of white asparagus based on near-infrared spectroscopy (NIR). 275 asparagus samples from six countries of origin and three years of harvest were analyzed. Support vector machine (SVM) classifiers were trained to predict the geographical origin and validated using nested cross-validation. When coupled with feature selection, a linear SVM was able to predict the country of origin with an accuracy of 89%. Confidence estimation based on posterior class probabilities can be used to exclude unreliable classifications leading to an accuracy up to 97%. These results demonstrate the potential of NIR spectroscopy combined with machine learning methods as a screening technique for provenance distinction of asparagus. |
B, Richter; M, Rurik; S, Gurk; O, Kohlbacher; M, Fischer Food monitoring: Screening of the geographical origin of white asparagus using FT-NIR and machine learning Food Control, 104 , pp. 318-325, 2019. @article{Richter_2019b, title = {Food monitoring: Screening of the geographical origin of white asparagus using FT-NIR and machine learning}, author = {Richter B and Rurik M and Gurk S and Kohlbacher O and Fischer M}, url = {https://doi.org/10.1016/j.foodcont.2019.04.032}, doi = {10.1016/j.foodcont.2019.04.032}, year = {2019}, date = {2019-10-01}, journal = {Food Control}, volume = {104}, pages = {318-325}, abstract = {The aim of this study was to experimentally monitor the geographical origin of white asparagus based on near-infrared spectroscopy (NIR). 275 asparagus samples from six countries of origin and three years of harvest were analyzed. Support vector machine (SVM) classifiers were trained to predict the geographical origin and validated using nested cross-validation. When coupled with feature selection, a linear SVM was able to predict the country of origin with an accuracy of 89%. Confidence estimation based on posterior class probabilities can be used to exclude unreliable classifications leading to an accuracy up to 97%. These results demonstrate the potential of NIR spectroscopy combined with machine learning methods as a screening technique for provenance distinction of asparagus.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The aim of this study was to experimentally monitor the geographical origin of white asparagus based on near-infrared spectroscopy (NIR). 275 asparagus samples from six countries of origin and three years of harvest were analyzed. Support vector machine (SVM) classifiers were trained to predict the geographical origin and validated using nested cross-validation. When coupled with feature selection, a linear SVM was able to predict the country of origin with an accuracy of 89%. Confidence estimation based on posterior class probabilities can be used to exclude unreliable classifications leading to an accuracy up to 97%. These results demonstrate the potential of NIR spectroscopy combined with machine learning methods as a screening technique for provenance distinction of asparagus. |
Choobdar, Sarvenaz; Ahsen, Mehmet E; Crawford, Jake; Tomasoni, Mattia; Fang, Tao; Lamparter, David; Lin, Junyuan; Hescott, Benjamin; Hu, Xiaozhe; Mercer, Johnathan; Natoli, Ted; Narayan, Rajiv; and Subramanian, Aravind; Zhang, Jitao D; Stolovitzky, Gustavo; Kutalik, Zoltán; Lage, Kasper; Slonim, Donna K; Saez-Rodriguez, Julio; Cowen, Lenore J; Bergmann, Sven; Marbach, Daniel Assessment of network module identification across complex diseases Nat. Methods, 16 (9), pp. 843–852, 2019. @article{Choobdar265553b, title = {Assessment of network module identification across complex diseases}, author = {Sarvenaz Choobdar and Mehmet E Ahsen and Jake Crawford and Mattia Tomasoni and Tao Fang and David Lamparter and Junyuan Lin and Benjamin Hescott and Xiaozhe Hu and Johnathan Mercer and Ted Natoli and Rajiv Narayan and and Aravind Subramanian and Jitao D Zhang and Gustavo Stolovitzky and Zoltán Kutalik and Kasper Lage and Donna K Slonim and Julio Saez-Rodriguez and Lenore J Cowen and Sven Bergmann and Daniel Marbach}, url = {https://www.nature.com/articles/s41592-019-0509-5.pdf}, doi = {10.1038/s41592-019-0509-5}, year = {2019}, date = {2019-09-01}, journal = {Nat. Methods}, volume = {16}, number = {9}, pages = {843–852}, publisher = {Cold Spring Harbor Laboratory}, abstract = {Identification of modules in molecular networks is at the core of many current analysis methods in biomedical research. However, how well different approaches identify disease-relevant modules in different types of gene and protein networks remains poorly understood. We launched the textquotedblleftDisease Module Identification DREAM Challengetextquotedblright, an open competition to comprehensively assess module identification methods across diverse protein-protein interaction, signaling, gene co-expression, homology, and cancer-gene networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies (GWAS). Our critical assessment of 75 contributed module identification methods reveals novel top-performing algorithms, which recover complementary trait-associated modules. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets and correctly prioritize candidate disease genes. This community challenge establishes benchmarks, tools and guidelines for molecular network analysis to study human disease biology (https://synapse.org/modulechallenge).}, keywords = {}, pubstate = {published}, tppubtype = {article} } Identification of modules in molecular networks is at the core of many current analysis methods in biomedical research. However, how well different approaches identify disease-relevant modules in different types of gene and protein networks remains poorly understood. We launched the textquotedblleftDisease Module Identification DREAM Challengetextquotedblright, an open competition to comprehensively assess module identification methods across diverse protein-protein interaction, signaling, gene co-expression, homology, and cancer-gene networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies (GWAS). Our critical assessment of 75 contributed module identification methods reveals novel top-performing algorithms, which recover complementary trait-associated modules. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets and correctly prioritize candidate disease genes. This community challenge establishes benchmarks, tools and guidelines for molecular network analysis to study human disease biology (https://synapse.org/modulechallenge). |
S, Choobdar; ME, Ahsen; J, Crawford; M, Tomasoni; T, Fang; D, Lamparter; J, Lin; B, Hescott; X, Hu; andNatoli T, Mercer J; R, Narayan; Consortium, DREAM Module Identification Challenge; A, Subramanian; JD, Zhang; G, Stolovitzky; Z, Kutalik; K, Lage; DK, Slonim; J, Saez-Rodriguez; LJ, Cowen; S, Bergmann; D, Marbach Assessment of network module identification across complex diseases Nature Methods, 16 (9), pp. 843-852, 2019. @article{choobdar_2019, title = {Assessment of network module identification across complex diseases}, author = {Choobdar S and Ahsen ME and Crawford J and Tomasoni M and Fang T and Lamparter D and Lin J and Hescott B and Hu X and Mercer J andNatoli T and Narayan R and DREAM Module Identification Challenge Consortium and Subramanian A and Zhang JD and Stolovitzky G and Kutalik Z and Lage K and Slonim DK and Saez-Rodriguez J and Cowen LJ and Bergmann S and Marbach D}, url = {https://doi.org/10.1038/s41592-019-0509-5}, doi = {10.1038/s41592-019-0509-5}, year = {2019}, date = {2019-08-30}, journal = {Nature Methods}, volume = {16}, number = {9}, pages = {843-852}, abstract = {Many bioinformatics methods have been proposed for reducing the complexity of large gene or protein networks into relevant subnetworks or modules. Yet, how such methods compare to each other in terms of their ability to identify disease-relevant modules in different types of network remains poorly understood. We launched the ‘Disease Module Identification DREAM Challenge’, an open competition to comprehensively assess module identification methods across diverse protein–protein interaction, signaling, gene co-expression, homology and cancer-gene networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies. Our robust assessment of 75 module identification methods reveals top-performing algorithms, which recover complementary trait-associated modules. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets. This community challenge establishes biologically interpretable benchmarks, tools and guidelines for molecular network analysis to study human disease biology.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Many bioinformatics methods have been proposed for reducing the complexity of large gene or protein networks into relevant subnetworks or modules. Yet, how such methods compare to each other in terms of their ability to identify disease-relevant modules in different types of network remains poorly understood. We launched the ‘Disease Module Identification DREAM Challenge’, an open competition to comprehensively assess module identification methods across diverse protein–protein interaction, signaling, gene co-expression, homology and cancer-gene networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies. Our robust assessment of 75 module identification methods reveals top-performing algorithms, which recover complementary trait-associated modules. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets. This community challenge establishes biologically interpretable benchmarks, tools and guidelines for molecular network analysis to study human disease biology. |
S, Fillinger; de la L, Garza; A, Peltzer; O, Kohlbacher; S, Nahnsen Challenges of big data integration in the life sciences Analytical and Bioanalytical Chemistry, 411 (26), pp. 6791–6800, 2019. @article{Fillinger.2019, title = {Challenges of big data integration in the life sciences}, author = {Fillinger S and de la Garza L and Peltzer A and Kohlbacher O and Nahnsen S}, url = {https://link.springer.com/article/10.1007%2Fs00216-019-02074-9}, doi = {10.1007/s00216-019-02074-9 }, year = {2019}, date = {2019-08-28}, journal = {Analytical and Bioanalytical Chemistry}, volume = {411}, number = {26}, pages = {6791–6800}, abstract = {Big data has been reported to be revolutionizing many areas of life, including science. It summarizes data that is unprecedentedly large, rapidly generated, heterogeneous, and hard to accurately interpret. This availability has also brought new challenges: How to properly annotate data to make it searchable? What are the legal and ethical hurdles when sharing data? How to store data securely, preventing loss and corruption? The life sciences are not the only disciplines that must align themselves with big data requirements to keep up with the latest developments. The large hadron collider, for instance, generates research data at a pace beyond any current biomedical research center. There are three recent major coinciding events that explain the emergence of big data in the context of research: the technological revolution for data generation, the development of tools for data analysis, and a conceptual change towards open science and data. The true potential of big data lies in pattern discovery in large datasets, as well as the formulation of new models and hypotheses. Confirmation of the existence of the Higgs boson, for instance, is one of the most recent triumphs of big data analysis in physics. Digital representations of biological systems have become more comprehensive. This, in combination with advances in machine learning, creates exciting new research possibilities. In this paper, we review the state of big data in bioanalytical research and provide an overview of the guidelines for its proper usage.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Big data has been reported to be revolutionizing many areas of life, including science. It summarizes data that is unprecedentedly large, rapidly generated, heterogeneous, and hard to accurately interpret. This availability has also brought new challenges: How to properly annotate data to make it searchable? What are the legal and ethical hurdles when sharing data? How to store data securely, preventing loss and corruption? The life sciences are not the only disciplines that must align themselves with big data requirements to keep up with the latest developments. The large hadron collider, for instance, generates research data at a pace beyond any current biomedical research center. There are three recent major coinciding events that explain the emergence of big data in the context of research: the technological revolution for data generation, the development of tools for data analysis, and a conceptual change towards open science and data. The true potential of big data lies in pattern discovery in large datasets, as well as the formulation of new models and hypotheses. Confirmation of the existence of the Higgs boson, for instance, is one of the most recent triumphs of big data analysis in physics. Digital representations of biological systems have become more comprehensive. This, in combination with advances in machine learning, creates exciting new research possibilities. In this paper, we review the state of big data in bioanalytical research and provide an overview of the guidelines for its proper usage. |
van den Brand, J A J G; Dijkstra, T M H; J., Wetzels; Stengel, B; Metzger, M; Blankestijn, P J; Heerspink, Lambers; H.J., ; R.T., Gansevoort Predicting kidney failure from longitudinal kidney function trajectory: a comparison of models PLOS One, 14 (5), 2019. @article{van_den_brand_2019, title = { Predicting kidney failure from longitudinal kidney function trajectory: a comparison of models}, author = {van den Brand, J.A.J.G. and Dijkstra, T.M.H and Wetzels J. and Stengel, B. and Metzger, M. and Blankestijn, P.J. and Lambers Heerspink and H.J. and Gansevoort R.T. }, url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0216559}, doi = {10.1371/journal.pone.0216559}, year = {2019}, date = {2019-05-09}, journal = {PLOS One}, volume = {14}, number = {5}, abstract = {Rationale & objective Early prediction of chronic kidney disease (CKD) progression to end-stage kidney disease (ESKD) currently use Cox models including baseline estimated glomerular filtration rate (eGFR) only. Alternative approaches include a Cox model that includes eGFR slope determined over a baseline period of time, a Cox model with time varying GFR, or a joint modeling approach. We studied if these more complex approaches may further improve ESKD prediction. Study design Prospective cohort. Setting & participants We re-used data from two CKD cohorts including patients with baseline eGFR >30ml/min per 1.73m2. MASTERPLAN (N = 505; 55 ESKD events) was used as development dataset, and NephroTest (N = 1385; 72 events) for validation. Predictors All models included age, sex, eGFR, and albuminuria, known prognostic markers for ESKD. Analytical approach We trained the models on the MASTERPLAN data and determined discrimination and calibration for each model at 2 years follow-up for a prediction horizon of 2 years in the NephroTest cohort. We benchmarked the predictive performance against the Kidney Failure Risk Equation (KFRE). Results The C-statistics for the KFRE was 0.94 (95%CI 0.86 to 1.01). Performance was similar for the Cox model with time-varying eGFR (0.92 [0.84 to 0.97]), eGFR (0.95 [0.90 to 1.00]), and the joint model 0.91 [0.87 to 0.96]). The Cox model with eGFR slope showed the best calibration. Conclusion In the present studies, where the outcome was rare and follow-up data was highly complete, the joint models did not offer improvement in predictive performance over more traditional approaches such as a survival model with time-varying eGFR, or a model with eGFR slope. }, keywords = {}, pubstate = {published}, tppubtype = {article} } Rationale & objective Early prediction of chronic kidney disease (CKD) progression to end-stage kidney disease (ESKD) currently use Cox models including baseline estimated glomerular filtration rate (eGFR) only. Alternative approaches include a Cox model that includes eGFR slope determined over a baseline period of time, a Cox model with time varying GFR, or a joint modeling approach. We studied if these more complex approaches may further improve ESKD prediction. Study design Prospective cohort. Setting & participants We re-used data from two CKD cohorts including patients with baseline eGFR >30ml/min per 1.73m2. MASTERPLAN (N = 505; 55 ESKD events) was used as development dataset, and NephroTest (N = 1385; 72 events) for validation. Predictors All models included age, sex, eGFR, and albuminuria, known prognostic markers for ESKD. Analytical approach We trained the models on the MASTERPLAN data and determined discrimination and calibration for each model at 2 years follow-up for a prediction horizon of 2 years in the NephroTest cohort. We benchmarked the predictive performance against the Kidney Failure Risk Equation (KFRE). Results The C-statistics for the KFRE was 0.94 (95%CI 0.86 to 1.01). Performance was similar for the Cox model with time-varying eGFR (0.92 [0.84 to 0.97]), eGFR (0.95 [0.90 to 1.00]), and the joint model 0.91 [0.87 to 0.96]). The Cox model with eGFR slope showed the best calibration. Conclusion In the present studies, where the outcome was rare and follow-up data was highly complete, the joint models did not offer improvement in predictive performance over more traditional approaches such as a survival model with time-varying eGFR, or a model with eGFR slope. |
Bilich, Tatjana; Nelde, Annika; Bichmann, Leon; Roerden, Malte; Salih, Helmut R; Kowalewski, Daniel J; Schuster, Heiko; Tsou, Chih-Chiang; Marcu, Ana; Neidert, Marian C; Lübke, Maren; Rieth, Jonas; Schemionek, Mirle; Brümmendorf, Tim H; Vucinic, Vladan; Niederwieser, Dietger; Bauer, Jens; Märklin, Melanie; Peper, Janet K; Klein, Reinhild; Kanz, Lothar; Rammensee, Hans-Georg; Stevanovic, Stefan; Walz, Juliane S The HLA ligandome landscape of chronic myeloid leukemia delineates novel T-cell epitopes for immunotherapy Blood, 133 (6), pp. 550-565, 2019. @article{HLACML2019, title = {The HLA ligandome landscape of chronic myeloid leukemia delineates novel T-cell epitopes for immunotherapy}, author = {Tatjana Bilich and Annika Nelde and Leon Bichmann and Malte Roerden and Helmut R Salih and Daniel J Kowalewski and Heiko Schuster and Chih-Chiang Tsou and Ana Marcu and Marian C Neidert and Maren Lübke and Jonas Rieth and Mirle Schemionek and Tim H Brümmendorf and Vladan Vucinic and Dietger Niederwieser and Jens Bauer and Melanie Märklin and Janet K Peper and Reinhild Klein and Lothar Kanz and Hans-Georg Rammensee and Stefan Stevanovic and Juliane S Walz}, url = {http://www.bloodjournal.org/content/133/6/550?sso-checked=true}, doi = {https://doi.org/10.1182/blood-2018-07-866830}, year = {2019}, date = {2019-02-07}, journal = {Blood}, volume = {133}, number = {6}, pages = {550-565}, abstract = {Anti-leukemia immunity plays an important role in disease control and maintenance of tyrosine kinase inhibitor (TKI)-free remission in chronic myeloid leukemia (CML). Thus, antigen-specific immunotherapy holds promise to strengthen immune control in CML, but requires the identification of CML-associated targets. In this study, we used a mass spectrometry-based approach to identify naturally presented, HLA class I- and class II-restricted peptides in primary CML samples. Comparative HLA ligandome profiling using a comprehensive dataset of different hematological benign specimen and samples of CML patients in deep molecular remission delineated a panel of novel, frequently presented, CML-exclusive peptides. These non-mutated target antigens are of particular relevance since our extensive data mining approach suggests absence of naturally presented, BCR-ABL- and ABL-BCR-derived, HLA-restricted peptides and lack of frequent, tumor-exclusive presentation of known cancer/testis and leukemia-associated antigens. Functional characterization revealed spontaneous T-cell responses against the newly identified CML-associated peptides in CML patient samples and their ability to induce multifunctional and cytotoxic antigen-specific T cells de novo in samples of healthy volunteers and CML patients. These antigens are thus prime candidates for T cell-based immunotherapeutic approaches that may prolong TKI-free survival and even mediate cure of CML patients.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Anti-leukemia immunity plays an important role in disease control and maintenance of tyrosine kinase inhibitor (TKI)-free remission in chronic myeloid leukemia (CML). Thus, antigen-specific immunotherapy holds promise to strengthen immune control in CML, but requires the identification of CML-associated targets. In this study, we used a mass spectrometry-based approach to identify naturally presented, HLA class I- and class II-restricted peptides in primary CML samples. Comparative HLA ligandome profiling using a comprehensive dataset of different hematological benign specimen and samples of CML patients in deep molecular remission delineated a panel of novel, frequently presented, CML-exclusive peptides. These non-mutated target antigens are of particular relevance since our extensive data mining approach suggests absence of naturally presented, BCR-ABL- and ABL-BCR-derived, HLA-restricted peptides and lack of frequent, tumor-exclusive presentation of known cancer/testis and leukemia-associated antigens. Functional characterization revealed spontaneous T-cell responses against the newly identified CML-associated peptides in CML patient samples and their ability to induce multifunctional and cytotoxic antigen-specific T cells de novo in samples of healthy volunteers and CML patients. These antigens are thus prime candidates for T cell-based immunotherapeutic approaches that may prolong TKI-free survival and even mediate cure of CML patients. |
Löffler, Markus W; Mohr, Christopher; Bichmann, Leon; Freudenmann, Lena Katharina; Walzer, Mathias; Schroeder, Christopher W; Trautwein, Nico; Hilke, Franz J; Zinser, Raphael S; Mühlenbruch, Lena; Kowalewski, Daniel J; Schuster, Heiko; Sturm, Marc; Matthes, Jakob; Riess, Olaf; Czemmel, Stefan; Nahnsen, Sven; Königsrainer, Ingmar; Thiel, Karolin; Nadalin, Silvio; Beckert, Stefan; Bösmüller, Hans; Fend, Falko; Velic, Ana; Macek, Boris; Haen, Sebastian P; Buonaguro, Luigi; Kohlbacher, Oliver; Königsrainer, Alfred; Rammensee, Hans-Georg; Consortium, The HepaVac Multi-omics discovery of exome-derived neoantigens in hepatocellular carcinoma Genome Med., Forthcoming. @article{LoefflerGenomeMed2019, title = {Multi-omics discovery of exome-derived neoantigens in hepatocellular carcinoma}, author = {Markus W Löffler and Christopher Mohr and Leon Bichmann and Lena Katharina Freudenmann and Mathias Walzer and Christopher W Schroeder and Nico Trautwein and Franz J Hilke and Raphael S Zinser and Lena Mühlenbruch and Daniel J Kowalewski and Heiko Schuster and Marc Sturm and Jakob Matthes and Olaf Riess and Stefan Czemmel and Sven Nahnsen and Ingmar Königsrainer and Karolin Thiel and Silvio Nadalin and Stefan Beckert and Hans Bösmüller and Falko Fend and Ana Velic and Boris Macek and Sebastian P Haen and Luigi Buonaguro and Oliver Kohlbacher and Alfred Königsrainer and Hans-Georg Rammensee and The HepaVac Consortium}, year = {2019}, date = {2019-02-04}, journal = {Genome Med.}, abstract = {Background: Although mutated HLA ligands are considered ideal cancer-specific immunotherapy targets, evidence for their presentation is lacking in hepatocellular carcinomas (HCC). Employing a unique multi-omics approach comprising a neoepitope identification pipeline, we assessed exome-derived mutations naturally presented as HLA class I ligands in HCC. Methods: In-depth multi-omics analyses included whole exome and transcriptome sequencing to define individual patient-specific search spaces of neoepitope candidates. Evidence for the natural presentation of mutated HLA ligands was investigated through an in silico pipeline integrating proteome and HLA ligandome profiling data. Results: The approach was successfully validated in a state-of-the-art dataset from malignant melanoma and despite multi-omics evidence for mutations, mutated naturally presented HLA ligands remained elusive in HCC. An analysis of extensive cancer datasets confirmed fundamental differences in tumor mutational burden in HCC and melanoma, challenging the notion that exome-derived mutations contribute relevantly to the expectable neoepitope pool in malignancies with only few mutations. Conclusions: This study suggests that exome-derived mutated HLA ligands appear to be rarely presented in HCC, inter alia resulting from a low mutational burden as compared to other malignancies such as melanoma. Our results therefore demand widening the target scope for personalized immunotherapy beyond this limited range of mutated neoepitopes, particularly for malignancies with similar or lower mutational burden.}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } Background: Although mutated HLA ligands are considered ideal cancer-specific immunotherapy targets, evidence for their presentation is lacking in hepatocellular carcinomas (HCC). Employing a unique multi-omics approach comprising a neoepitope identification pipeline, we assessed exome-derived mutations naturally presented as HLA class I ligands in HCC. Methods: In-depth multi-omics analyses included whole exome and transcriptome sequencing to define individual patient-specific search spaces of neoepitope candidates. Evidence for the natural presentation of mutated HLA ligands was investigated through an in silico pipeline integrating proteome and HLA ligandome profiling data. Results: The approach was successfully validated in a state-of-the-art dataset from malignant melanoma and despite multi-omics evidence for mutations, mutated naturally presented HLA ligands remained elusive in HCC. An analysis of extensive cancer datasets confirmed fundamental differences in tumor mutational burden in HCC and melanoma, challenging the notion that exome-derived mutations contribute relevantly to the expectable neoepitope pool in malignancies with only few mutations. Conclusions: This study suggests that exome-derived mutated HLA ligands appear to be rarely presented in HCC, inter alia resulting from a low mutational burden as compared to other malignancies such as melanoma. Our results therefore demand widening the target scope for personalized immunotherapy beyond this limited range of mutated neoepitopes, particularly for malignancies with similar or lower mutational burden. |
Ali, Muhammed; Foldvari, Zsofia; Giannakopoulou, Eirini; Böschen, Maxi-Lu; Strønen, Erlend; Yang, Weiwen; Toebes, Mireille; Schubert, Benjamin; Kohlbacher, Oliver; Schumacher, Ton N; Olweus, Johanna Induction Of Neoantigen Reactive T Cells From Healthy Donors Nat. Protocols, Forthcoming. @article{NeoantigensNatProt2019, title = {Induction Of Neoantigen Reactive T Cells From Healthy Donors}, author = {Muhammed Ali and Zsofia Foldvari and Eirini Giannakopoulou and Maxi-Lu Böschen and Erlend Strønen and Weiwen Yang and Mireille Toebes and Benjamin Schubert and Oliver Kohlbacher and Ton N Schumacher and Johanna Olweus}, url = {https://www.nature.com/articles/s41596-019-0170-6}, doi = {10.1038/s41596-019-0170-6}, year = {2019}, date = {2019-02-01}, journal = {Nat. Protocols}, abstract = {Identification of immunogenic neoantigens and their cognate T cells represent the most crucial and rate-limiting steps in the development of personalized cancer immunotherapies that are based on vaccination or on infusion of T cell receptor-engineered T cells. Recent advances in deep sequencing technologies and in silico prediction algorithms enable rapid identification of candidate neoepitopes. However, large scale validation of putative neoepitopes and isolation of reactive T cells is challenging due to limited availablity of patient material and low frequencies of neoepitope-specific T cells. Here, we describe a standardized protocol for induction of neoepitope-reactive T cells from healthy donor T cell repertoires, unaffected by the potentially immunosuppressive environment of the tumor-bearing host. Monocyte-derived dendritic cells transfected with mRNA encoding candidate neoepitopes are utilized to prime autologous naïve CD8+ T cells. Antigen-specific T cells recognizing endogenously processed and presented epitopes are detected using peptide-MHC (pMHC) multimers. Single multimer-positive T cells are sorted for identification of TCR sequences, preceded by an optional step that includes clonal expansion and functional characterization. The time required to identify neoepitope-specific T cells is 15 days, with an additional two to four weeks required for clonal expansion and downstream functional characterization. Identified neoepitopes and corresponding TCRs provide candidates for use in vaccination and TCR-based cancer immunotherapies, and data sets generated by this technology should be of value to improve algorithms to predict immunogenic neoantigens.}, keywords = {}, pubstate = {forthcoming}, tppubtype = {article} } Identification of immunogenic neoantigens and their cognate T cells represent the most crucial and rate-limiting steps in the development of personalized cancer immunotherapies that are based on vaccination or on infusion of T cell receptor-engineered T cells. Recent advances in deep sequencing technologies and in silico prediction algorithms enable rapid identification of candidate neoepitopes. However, large scale validation of putative neoepitopes and isolation of reactive T cells is challenging due to limited availablity of patient material and low frequencies of neoepitope-specific T cells. Here, we describe a standardized protocol for induction of neoepitope-reactive T cells from healthy donor T cell repertoires, unaffected by the potentially immunosuppressive environment of the tumor-bearing host. Monocyte-derived dendritic cells transfected with mRNA encoding candidate neoepitopes are utilized to prime autologous naïve CD8+ T cells. Antigen-specific T cells recognizing endogenously processed and presented epitopes are detected using peptide-MHC (pMHC) multimers. Single multimer-positive T cells are sorted for identification of TCR sequences, preceded by an optional step that includes clonal expansion and functional characterization. The time required to identify neoepitope-specific T cells is 15 days, with an additional two to four weeks required for clonal expansion and downstream functional characterization. Identified neoepitopes and corresponding TCRs provide candidates for use in vaccination and TCR-based cancer immunotherapies, and data sets generated by this technology should be of value to improve algorithms to predict immunogenic neoantigens. |
Alka, Oliver; Sachsenberg, Timo; Bichmann, Leon; Pfeuffer, Julianus; Weisser, Hendrik; Wein, Samuel; Netz, Eugen; Rurik, Marc; Kohlbacher, Oliver; Rost, Hannes OpenMS for open source analysis of mass spectrometric data PeerJ Preprints, 7 , pp. e27766v1, 2019, ISSN: 2167-9843. @article{10.7287/peerj.preprints.27766v1, title = {OpenMS for open source analysis of mass spectrometric data}, author = {Oliver Alka and Timo Sachsenberg and Leon Bichmann and Julianus Pfeuffer and Hendrik Weisser and Samuel Wein and Eugen Netz and Marc Rurik and Oliver Kohlbacher and Hannes Rost}, url = {https://doi.org/10.7287/peerj.preprints.27766v1}, doi = {10.7287/peerj.preprints.27766v1}, issn = {2167-9843}, year = {2019}, date = {2019-01-01}, journal = {PeerJ Preprints}, volume = {7}, pages = {e27766v1}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Cain, Nicolas; Alka, Oliver; Segelke, Torben; von Wuthenau, Kristian; Kohlbacher, Oliver; Fischer, Markus Food fingerprinting: Mass spectrometric determination of the cocoa shell content (Theobroma cacao L.) in cocoa products by HPLC-QTOF-MS Food Chemistry, 298 , pp. 125013, 2019, ISSN: 0308-8146. @article{CAIN2019125013, title = {Food fingerprinting: Mass spectrometric determination of the cocoa shell content (Theobroma cacao L.) in cocoa products by HPLC-QTOF-MS}, author = {Nicolas Cain and Oliver Alka and Torben Segelke and Kristian von Wuthenau and Oliver Kohlbacher and Markus Fischer}, url = {http://www.sciencedirect.com/science/article/pii/S030881461931115X}, doi = {https://doi.org/10.1016/j.foodchem.2019.125013}, issn = {0308-8146}, year = {2019}, date = {2019-01-01}, journal = {Food Chemistry}, volume = {298}, pages = {125013}, abstract = {The determination of cocoa shell content (Theobroma cacao L.) in cocoa products using a metabolomics approach was accomplished via high performance liquid chromatography quadrupole time-of-flight mass spectrometry (HPLC-QTOF-MS). The developed method was used to separately analyze the polar and non-polar metabolome of the cocoa testa (cocoa shell) and the cocoa cotyledons (cocoa nibs) of cocoa samples from 15 different geographic origins, harvest years, and varieties in positive and negative ion mode. Potential key metabolites were selected which are exclusively contained in the cocoa shell or with significant higher concentration in the cocoa shell than in the cocoa nibs. The pool of potential key metabolites was filtered by established selection criteria, such as temperature stability, fermentations stability, and independence from the geographic origin. Based on these key metabolites an inverse sparse partial least square regression (SPLS) was used for the prediction of the cocoa shell content.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The determination of cocoa shell content (Theobroma cacao L.) in cocoa products using a metabolomics approach was accomplished via high performance liquid chromatography quadrupole time-of-flight mass spectrometry (HPLC-QTOF-MS). The developed method was used to separately analyze the polar and non-polar metabolome of the cocoa testa (cocoa shell) and the cocoa cotyledons (cocoa nibs) of cocoa samples from 15 different geographic origins, harvest years, and varieties in positive and negative ion mode. Potential key metabolites were selected which are exclusively contained in the cocoa shell or with significant higher concentration in the cocoa shell than in the cocoa nibs. The pool of potential key metabolites was filtered by established selection criteria, such as temperature stability, fermentations stability, and independence from the geographic origin. Based on these key metabolites an inverse sparse partial least square regression (SPLS) was used for the prediction of the cocoa shell content. |
Licha, David; Vidali, Silvia; Aminzadeh-Gohari, Sepideh; Alka, Oliver; Breitkreuz, Leander; Kohlbacher, Oliver; Reischl, Roland J; Feichtinger, René G; Kofler, Barbara; Huber, Christian G Untargeted Metabolomics Reveals Molecular Effects of Ketogenic Diet on Healthy and Tumor Xenograft Mouse Models International Journal of Molecular Sciences, 20 (16), 2019, ISSN: 1422-0067. @article{ijms20163873, title = {Untargeted Metabolomics Reveals Molecular Effects of Ketogenic Diet on Healthy and Tumor Xenograft Mouse Models}, author = {David Licha and Silvia Vidali and Sepideh Aminzadeh-Gohari and Oliver Alka and Leander Breitkreuz and Oliver Kohlbacher and Roland J Reischl and René G Feichtinger and Barbara Kofler and Christian G Huber}, url = {https://www.mdpi.com/1422-0067/20/16/3873}, doi = {10.3390/ijms20163873}, issn = {1422-0067}, year = {2019}, date = {2019-01-01}, journal = {International Journal of Molecular Sciences}, volume = {20}, number = {16}, abstract = {The application of ketogenic diet (KD) (high fat/low carbohydrate/adequate protein) as an auxiliary cancer therapy is a field of growing attention. KD provides sufficient energy supply for healthy cells, while possibly impairing energy production in highly glycolytic tumor cells. Moreover, KD regulates insulin and tumor related growth factors (like insulin growth factor-1, IGF-1). In order to provide molecular evidence for the proposed additional inhibition of tumor growth when combining chemotherapy with KD, we applied untargeted quantitative metabolome analysis on a spontaneous breast cancer xenograft mouse model, using MDA-MB-468 cells. Healthy mice and mice bearing breast cancer xenografts and receiving cyclophosphamide chemotherapy were compared after treatment with control diet and KD. Metabolomic profiling was performed on plasma samples, applying high-performance liquid chromatography coupled to tandem mass spectrometry. Statistical analysis revealed metabolic fingerprints comprising numerous significantly regulated features in the group of mice bearing breast cancer. This fingerprint disappeared after treatment with KD, resulting in recovery to the metabolic status observed in healthy mice receiving control diet. Moreover, amino acid metabolism as well as fatty acid transport were found to be affected by both the tumor and the applied KD. Our results provide clear evidence of a significant molecular effect of adjuvant KD in the context of tumor growth inhibition and suggest additional mechanisms of tumor suppression beyond the proposed constrain in energy supply of tumor cells.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The application of ketogenic diet (KD) (high fat/low carbohydrate/adequate protein) as an auxiliary cancer therapy is a field of growing attention. KD provides sufficient energy supply for healthy cells, while possibly impairing energy production in highly glycolytic tumor cells. Moreover, KD regulates insulin and tumor related growth factors (like insulin growth factor-1, IGF-1). In order to provide molecular evidence for the proposed additional inhibition of tumor growth when combining chemotherapy with KD, we applied untargeted quantitative metabolome analysis on a spontaneous breast cancer xenograft mouse model, using MDA-MB-468 cells. Healthy mice and mice bearing breast cancer xenografts and receiving cyclophosphamide chemotherapy were compared after treatment with control diet and KD. Metabolomic profiling was performed on plasma samples, applying high-performance liquid chromatography coupled to tandem mass spectrometry. Statistical analysis revealed metabolic fingerprints comprising numerous significantly regulated features in the group of mice bearing breast cancer. This fingerprint disappeared after treatment with KD, resulting in recovery to the metabolic status observed in healthy mice receiving control diet. Moreover, amino acid metabolism as well as fatty acid transport were found to be affected by both the tumor and the applied KD. Our results provide clear evidence of a significant molecular effect of adjuvant KD in the context of tumor growth inhibition and suggest additional mechanisms of tumor suppression beyond the proposed constrain in energy supply of tumor cells. |
Bichmann, Leon; Nelde, Annika; Ghosh, Michael; Heumos, Lukas; Mohr, Christopher; Peltzer, Alexander; Kuchenbecker, Leon; Sachsenberg, Timo; Walz, Juliane S; Stevanović, Stefan; Rammensee, Hans-Georg; Kohlbacher, Oliver MHCquant: Automated and Reproducible Data Analysis for Immunopeptidomics Journal of Proteome Research, 18 (11), pp. 3876-3884, 2019, (PMID: 31589052). @article{doi:10.1021/acs.jproteome.9b00313, title = {MHCquant: Automated and Reproducible Data Analysis for Immunopeptidomics}, author = {Leon Bichmann and Annika Nelde and Michael Ghosh and Lukas Heumos and Christopher Mohr and Alexander Peltzer and Leon Kuchenbecker and Timo Sachsenberg and Juliane S Walz and Stefan Stevanović and Hans-Georg Rammensee and Oliver Kohlbacher}, url = {https://doi.org/10.1021/acs.jproteome.9b00313}, doi = {10.1021/acs.jproteome.9b00313}, year = {2019}, date = {2019-01-01}, journal = {Journal of Proteome Research}, volume = {18}, number = {11}, pages = {3876-3884}, note = {PMID: 31589052}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Sürün, Bilge; Schärfe, Charlotta P I; Divine, Mathew R; Heinrich, Julian; Toussaint, Nora C; Zimmermann, Lukas; Beha, Janina; Kohlbacher, Oliver ClinVAP: A reporting strategy from variants to therapeutic options Bioinformatics, 2019, ISSN: 1367-4803, (btz924). @article{10.1093/bioinformatics/btz924, title = {ClinVAP: A reporting strategy from variants to therapeutic options}, author = {Bilge Sürün and Charlotta P I Schärfe and Mathew R Divine and Julian Heinrich and Nora C Toussaint and Lukas Zimmermann and Janina Beha and Oliver Kohlbacher}, url = {https://doi.org/10.1093/bioinformatics/btz924}, doi = {10.1093/bioinformatics/btz924}, issn = {1367-4803}, year = {2019}, date = {2019-01-01}, journal = {Bioinformatics}, abstract = {Next-generation sequencing (NGS) has become routine in oncology and opens up new avenues of therapies, particularly in personalized oncology setting. An increasing number of cases also implies a need for a more robust, automated, and reproducible processing of long lists of variants for cancer diagnosis and therapy. While solutions for the large-scale analysis of somatic variants have been implemented, existing solutions often have issues with reproducibility, scalability, and interoperability.ClinVAP is an automated pipeline which annotates, filters, and prioritizes somatic single nucleotide variants (SNVs) provided in variant call format. It augments the variant information with documented or predicted clinical effect. These annotated variants are prioritized based on driver gene status and druggability. ClinVAP is available as a fully containerized, self-contained pipeline maximizing reproducibility and scalability allowing the analysis of larger scale data. The resulting JSON-based report is suited for automated downstream processing, but ClinVAP can also automatically render the information into a user-defined template to yield a human-readable report.ClinVAP is available at https://github.com/PersonalizedOncology/ClinVAPSupplementary data are available at Bioinformatics online.}, note = {btz924}, keywords = {}, pubstate = {published}, tppubtype = {article} } Next-generation sequencing (NGS) has become routine in oncology and opens up new avenues of therapies, particularly in personalized oncology setting. An increasing number of cases also implies a need for a more robust, automated, and reproducible processing of long lists of variants for cancer diagnosis and therapy. While solutions for the large-scale analysis of somatic variants have been implemented, existing solutions often have issues with reproducibility, scalability, and interoperability.ClinVAP is an automated pipeline which annotates, filters, and prioritizes somatic single nucleotide variants (SNVs) provided in variant call format. It augments the variant information with documented or predicted clinical effect. These annotated variants are prioritized based on driver gene status and druggability. ClinVAP is available as a fully containerized, self-contained pipeline maximizing reproducibility and scalability allowing the analysis of larger scale data. The resulting JSON-based report is suited for automated downstream processing, but ClinVAP can also automatically render the information into a user-defined template to yield a human-readable report.ClinVAP is available at https://github.com/PersonalizedOncology/ClinVAPSupplementary data are available at Bioinformatics online. |
Lübke, Maren; Spalt, Stefanie; Kowalewski, Daniel J; Zimmermann, Cosima; Bauersfeld, Liane; Nelde, Annika; Bichmann, Leon; Marcu, Ana; Peper, Janet Kerstin; Kohlbacher, Oliver; Walz, Juliane S; Le-Trilling, Vu Thuy Khanh; Hengel, Hartmut; Rammensee, Hans-Georg; Stevanović, Stefan; Halenius, Anne Identification of HCMV-derived T cell epitopes in seropositive individuals through viral deletion models The Journal of Experimental Medicine, 217 (3), 2019, ISSN: 0022-1007, (e20191164). @article{10.1084/jem.20191164, title = {Identification of HCMV-derived T cell epitopes in seropositive individuals through viral deletion models}, author = {Maren Lübke and Stefanie Spalt and Daniel J Kowalewski and Cosima Zimmermann and Liane Bauersfeld and Annika Nelde and Leon Bichmann and Ana Marcu and Janet Kerstin Peper and Oliver Kohlbacher and Juliane S Walz and Vu Thuy Khanh Le-Trilling and Hartmut Hengel and Hans-Georg Rammensee and Stefan Stevanović and Anne Halenius}, url = {https://doi.org/10.1084/jem.20191164}, doi = {10.1084/jem.20191164}, issn = {0022-1007}, year = {2019}, date = {2019-01-01}, journal = {The Journal of Experimental Medicine}, volume = {217}, number = {3}, abstract = {In healthy individuals, immune control of persistent human cytomegalovirus (HCMV) infection is effectively mediated by virus-specific CD4+ and CD8+ T cells. However, identifying the repertoire of T cell specificities for HCMV is hampered by the immense protein coding capacity of this betaherpesvirus. Here, we present a novel approach that employs HCMV deletion mutant viruses lacking HLA class I immunoevasins and allows direct identification of naturally presented HCMV-derived HLA ligands by mass spectrometry. We identified 368 unique HCMV-derived HLA class I ligands representing an unexpectedly broad panel of 123 HCMV antigens. Functional characterization revealed memory T cell responses in seropositive individuals for a substantial proportion (28%) of these novel peptides. Multiple HCMV-directed specificities in the memory T cell pool of single individuals indicate that physiologic anti-HCMV T cell responses are directed against a broad range of antigens. Thus, the unbiased identification of naturally presented viral epitopes enabled a comprehensive and systematic assessment of the physiological repertoire of anti-HCMV T cell specificities in seropositive individuals.}, note = {e20191164}, keywords = {}, pubstate = {published}, tppubtype = {article} } In healthy individuals, immune control of persistent human cytomegalovirus (HCMV) infection is effectively mediated by virus-specific CD4+ and CD8+ T cells. However, identifying the repertoire of T cell specificities for HCMV is hampered by the immense protein coding capacity of this betaherpesvirus. Here, we present a novel approach that employs HCMV deletion mutant viruses lacking HLA class I immunoevasins and allows direct identification of naturally presented HCMV-derived HLA ligands by mass spectrometry. We identified 368 unique HCMV-derived HLA class I ligands representing an unexpectedly broad panel of 123 HCMV antigens. Functional characterization revealed memory T cell responses in seropositive individuals for a substantial proportion (28%) of these novel peptides. Multiple HCMV-directed specificities in the memory T cell pool of single individuals indicate that physiologic anti-HCMV T cell responses are directed against a broad range of antigens. Thus, the unbiased identification of naturally presented viral epitopes enabled a comprehensive and systematic assessment of the physiological repertoire of anti-HCMV T cell specificities in seropositive individuals. |
Creydt, Marina; Hudzik, Daria; Rurik, Marc; Kohlbacher, Oliver; Fischer, Markus Food Authentication: Small Molecule Profiling as a Tool for the Geographic Discrimination of German White Asparagus J. Agric. Food Chem., 66 (50), pp. 13328-13339, 2018. @article{AsparagusLCMS-2018, title = {Food Authentication: Small Molecule Profiling as a Tool for the Geographic Discrimination of German White Asparagus}, author = {Marina Creydt and Daria Hudzik and Marc Rurik and Oliver Kohlbacher and Markus Fischer}, url = {https://pubs.acs.org/doi/10.1021/acs.jafc.8b05791}, year = {2018}, date = {2018-11-25}, journal = {J. Agric. Food Chem.}, volume = {66}, number = {50}, pages = {13328-13339}, abstract = {For the first time, a non-targeted metabolomics approach by means of ultra-performance liquid chromatography coupled to electrospray quadruple time-of-flight mass spectrometer (UPLC-qTOF-ESI-MS(/MS)) was chosen for the discrimination of geographical origins of white asparagus samples (Asparagus officinalis). Over a period of four harvesting periods (4 years) approximately 400 asparagus samples were measured. Initially, four different LC-MS methods were used to detect as many metabolites as possible and to assess which method is most suitable. The most relevant marker compounds were linked to the influence of different plant stress parameters and climate effects. Some of the samples were also analyzed by isotope-ratio mass spectrometry (IRMS) which is the current gold standard for the discrimination of the geographical origin of asparagus. In summary, the analysis of the metabolome was proved to be quite suitable to determine the geographical origin of asparagus and seems to provide better interpretable results than IRMS studies.}, keywords = {}, pubstate = {published}, tppubtype = {article} } For the first time, a non-targeted metabolomics approach by means of ultra-performance liquid chromatography coupled to electrospray quadruple time-of-flight mass spectrometer (UPLC-qTOF-ESI-MS(/MS)) was chosen for the discrimination of geographical origins of white asparagus samples (Asparagus officinalis). Over a period of four harvesting periods (4 years) approximately 400 asparagus samples were measured. Initially, four different LC-MS methods were used to detect as many metabolites as possible and to assess which method is most suitable. The most relevant marker compounds were linked to the influence of different plant stress parameters and climate effects. Some of the samples were also analyzed by isotope-ratio mass spectrometry (IRMS) which is the current gold standard for the discrimination of the geographical origin of asparagus. In summary, the analysis of the metabolome was proved to be quite suitable to determine the geographical origin of asparagus and seems to provide better interpretable results than IRMS studies. |
Rehm, Markus; Apweiler, Rolf; Beissbarth, Tim; Berthold, Michael; Blüthgen, Nils; Burmeister, Yvonne; Dammann, Olaf; Deutsch, Andreas; Feuerhake, Friederike; Franke, Andre; Hasenauer, Jan; Hoffmann, Steve; Höfer, Thomas; Jansen, Peter; Kaderali, Lars; Klingmüller, Ursula; Koch, Ina; Kohlbacher, Oliver; Kuepfer, Lars; Lammert, Frank; Maier, Dieter; Pfeifer, Nico; Radde, Nicole; Roeder, Ingo; Saez-Rodriguez, Julio; Sax, Ulrich; Schmeck, Bernd; Schuppert, Andreas; Seilheimer, Bernd; Theis, Fabian; Vera-Gonzáles, Julio; Wolkenhauer, Olaf Whither Systems Medicine? Exp. Mol. Med., 50 (3), pp. e453, 2018. @article{WhitherSysMed2017, title = {Whither Systems Medicine?}, author = {Markus Rehm and Rolf Apweiler and Tim Beissbarth and Michael Berthold and Nils Blüthgen and Yvonne Burmeister and Olaf Dammann and Andreas Deutsch and Friederike Feuerhake and Andre Franke and Jan Hasenauer and Steve Hoffmann and Thomas Höfer and Peter Jansen and Lars Kaderali and Ursula Klingmüller and Ina Koch and Oliver Kohlbacher and Lars Kuepfer and Frank Lammert and Dieter Maier and Nico Pfeifer and Nicole Radde and Ingo Roeder and Julio Saez-Rodriguez and Ulrich Sax and Bernd Schmeck and Andreas Schuppert and Bernd Seilheimer and Fabian Theis and Julio Vera-Gonzáles and Olaf Wolkenhauer}, url = {https://www.nature.com/articles/emm2017290}, year = {2018}, date = {2018-01-01}, journal = {Exp. Mol. Med.}, volume = {50}, number = {3}, pages = {e453}, abstract = {ew technologies to generate, store and retrieve medical and research data are inducing a rapid change in clinical and translational research and health care. Systems medicine is the interdisciplinary approach wherein physicians and clinical investigators team up with experts from biology, biostatistics, informatics, mathematics and computational modeling to develop methods to use new and stored data to the benefit of the patient. We here provide a critical assessment of the opportunities and challenges arising out of systems approaches in medicine and from this provide a definition of what systems medicine entails. Based on our analysis of current developments in medicine and healthcare and associated research needs, we emphasize the role of systems medicine as a multilevel and multidisciplinary methodological framework for informed data acquisition and interdisciplinary data analysis to extract previously inaccessible knowledge for the benefit of patients.}, keywords = {}, pubstate = {published}, tppubtype = {article} } ew technologies to generate, store and retrieve medical and research data are inducing a rapid change in clinical and translational research and health care. Systems medicine is the interdisciplinary approach wherein physicians and clinical investigators team up with experts from biology, biostatistics, informatics, mathematics and computational modeling to develop methods to use new and stored data to the benefit of the patient. We here provide a critical assessment of the opportunities and challenges arising out of systems approaches in medicine and from this provide a definition of what systems medicine entails. Based on our analysis of current developments in medicine and healthcare and associated research needs, we emphasize the role of systems medicine as a multilevel and multidisciplinary methodological framework for informed data acquisition and interdisciplinary data analysis to extract previously inaccessible knowledge for the benefit of patients. |
Schubert, Benjamin; Schärfe, Charlotta; Dönnes, Pierre; Hopf, Thomas; Marks, Debora; Kohlbacher, Oliver Population-specific design of de-immunized protein biotherapeutics PLoS Comput Biol, 14 (3), pp. (3):e1005983, 2018. @article{DeImmunoPLoSCB2018, title = {Population-specific design of de-immunized protein biotherapeutics}, author = {Benjamin Schubert and Charlotta Schärfe and Pierre Dönnes and Thomas Hopf and Debora Marks and Oliver Kohlbacher}, url = {https://dx.doi.org/10.1371%2Fjournal.pcbi.1005983}, year = {2018}, date = {2018-01-01}, journal = {PLoS Comput Biol}, volume = {14}, number = {3}, pages = {(3):e1005983}, abstract = {Immunogenicity is a major problem during the development of biotherapeutics since it can lead to rapid clearance of the drug and adverse reactions. The challenge for biotherapeutic design is therefore to identify mutants of the protein sequence that minimize immunogenicity in a target population whilst retaining pharmaceutical activity and protein function. Current approaches are moderately successful in designing sequences with reduced immunogenicity, but do not account for the varying frequencies of different human leucocyte antigen alleles in a specific population and in addition, since many designs are non-functional, require costly experimental post-screening. Here, we report a new method for de-immunization design using multi-objective combinatorial optimization. The method simultaneously optimizes the likelihood of a functional protein sequence at the same time as minimizing its immunogenicity tailored to a target population. We bypass the need for three-dimensional protein structure or molecular simulations to identify functional designs by automatically generating sequences using probabilistic models that have been used previously for mutation effect prediction and structure prediction. As proof-of-principle we designed sequences of the C2 domain of Factor VIII and tested them experimentally, resulting in a good correlation with the predicted immunogenicity of our model.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Immunogenicity is a major problem during the development of biotherapeutics since it can lead to rapid clearance of the drug and adverse reactions. The challenge for biotherapeutic design is therefore to identify mutants of the protein sequence that minimize immunogenicity in a target population whilst retaining pharmaceutical activity and protein function. Current approaches are moderately successful in designing sequences with reduced immunogenicity, but do not account for the varying frequencies of different human leucocyte antigen alleles in a specific population and in addition, since many designs are non-functional, require costly experimental post-screening. Here, we report a new method for de-immunization design using multi-objective combinatorial optimization. The method simultaneously optimizes the likelihood of a functional protein sequence at the same time as minimizing its immunogenicity tailored to a target population. We bypass the need for three-dimensional protein structure or molecular simulations to identify functional designs by automatically generating sequences using probabilistic models that have been used previously for mutation effect prediction and structure prediction. As proof-of-principle we designed sequences of the C2 domain of Factor VIII and tested them experimentally, resulting in a good correlation with the predicted immunogenicity of our model. |
Mohr, Christopher; Friedrich, Andreas; Wojnar, David; Kenar, Erhan; Polatkan, Aydin-Can; Codrea, Marius Cosmin; Czemmel, Stefan; Kohlbacher, Oliver; Nahnsen, Sven qPortal: A platform for data-driven biomedical research PLoS ONE, 13 (1), pp. e0191603, 2018. @article{qPortal-PLoSONE-2018, title = {qPortal: A platform for data-driven biomedical research}, author = {Christopher Mohr and Andreas Friedrich and David Wojnar and Erhan Kenar and Aydin-Can Polatkan and Marius Cosmin Codrea and Stefan Czemmel and Oliver Kohlbacher and Sven Nahnsen}, url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0191603}, year = {2018}, date = {2018-01-01}, journal = {PLoS ONE}, volume = {13}, number = {1}, pages = {e0191603}, abstract = {Modern biomedical research aims at drawing biological conclusions from large, highly complex biological datasets. It has become common practice to make extensive use of high-throughput technologies that produce big amounts of heterogeneous data. In addition to the ever-improving accuracy, methods are getting faster and cheaper, resulting in a steadily increasing need for scalable data management and easily accessible means of analysis. We present qPortal, a platform providing users with an intuitive way to manage and analyze quantitative biological data. The backend leverages a variety of concepts and technologies, such as relational databases, data stores, data models and means of data transfer, as well as front-end solutions to give users access to data management and easy-to-use analysis options. Users are empowered to conduct their experiments from the experimental design to the visualization of their results through the platform. Here, we illustrate the feature-rich portal by simulating a biomedical study based on publically available data. We demonstrate the software’s strength in supporting the entire project life cycle. The software supports the project design and registration, empowers users to do all-digital project management and finally provides means to perform analysis. We compare our approach to Galaxy, one of the most widely used scientific workflow and analysis platforms in computational biology. Application of both systems to a small case study shows the differences between a data-driven approach (qPortal) and a workflow-driven approach (Galaxy). qPortal, a one-stop-shop solution for biomedical projects offers up-to-date analysis pipelines, quality control workflows, and visualization tools. Through intensive user interactions, appropriate data models have been developed. These models build the foundation of our biological data management system and provide possibilities to annotate data, query metadata for statistics and future re-analysis on high-performance computing systems via coupling of workflow management systems. Integration of project and data management as well as workflow resources in one place present clear advantages over existing solutions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Modern biomedical research aims at drawing biological conclusions from large, highly complex biological datasets. It has become common practice to make extensive use of high-throughput technologies that produce big amounts of heterogeneous data. In addition to the ever-improving accuracy, methods are getting faster and cheaper, resulting in a steadily increasing need for scalable data management and easily accessible means of analysis. We present qPortal, a platform providing users with an intuitive way to manage and analyze quantitative biological data. The backend leverages a variety of concepts and technologies, such as relational databases, data stores, data models and means of data transfer, as well as front-end solutions to give users access to data management and easy-to-use analysis options. Users are empowered to conduct their experiments from the experimental design to the visualization of their results through the platform. Here, we illustrate the feature-rich portal by simulating a biomedical study based on publically available data. We demonstrate the software’s strength in supporting the entire project life cycle. The software supports the project design and registration, empowers users to do all-digital project management and finally provides means to perform analysis. We compare our approach to Galaxy, one of the most widely used scientific workflow and analysis platforms in computational biology. Application of both systems to a small case study shows the differences between a data-driven approach (qPortal) and a workflow-driven approach (Galaxy). qPortal, a one-stop-shop solution for biomedical projects offers up-to-date analysis pipelines, quality control workflows, and visualization tools. Through intensive user interactions, appropriate data models have been developed. These models build the foundation of our biological data management system and provide possibilities to annotate data, query metadata for statistics and future re-analysis on high-performance computing systems via coupling of workflow management systems. Integration of project and data management as well as workflow resources in one place present clear advantages over existing solutions. |
Choobdar, Sarvenaz; Ahsen, Mehmet E; Crawford, Jake; Tomasoni, Mattia; Lamparter, David; Lin, Junyuan; Hescott, Benjamin; Hu, Xiaozhe; Mercer, Johnathan; Natoli, Ted; Narayan, Rajiv; Aravind, ; Subramanian, ; Stolovitzky, Gustavo; Kutalik, Zoltán; Lage, Kasper; Slonim, Donna K; Saez-Rodriguez, Julio; Cowen, Lenore J; Bergmann, Sven; Marbach, Daniel Open Community Challenge Reveals Molecular Network Modules with Key Roles in Diseases bioRxiv, 2018. @article{Choobdar265553, title = {Open Community Challenge Reveals Molecular Network Modules with Key Roles in Diseases}, author = {Sarvenaz Choobdar and Mehmet E Ahsen and Jake Crawford and Mattia Tomasoni and David Lamparter and Junyuan Lin and Benjamin Hescott and Xiaozhe Hu and Johnathan Mercer and Ted Natoli and Rajiv Narayan and Aravind and Subramanian and Gustavo Stolovitzky and Zoltán Kutalik and Kasper Lage and Donna K Slonim and Julio Saez-Rodriguez and Lenore J Cowen and Sven Bergmann and Daniel Marbach}, url = {https://www.biorxiv.org/content/early/2018/02/15/265553}, year = {2018}, date = {2018-01-01}, journal = {bioRxiv}, abstract = {Identification of modules in molecular networks is at the core of many current analysis methods in biomedical research. However, how well different approaches identify disease-relevant modules in different types of networks remains poorly understood. We launched the "Disease Module Identification DREAM Challenge", an open competition to comprehensively assess module identification methods across diverse gene, protein and signaling networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies (GWAS). While a number of approaches were successful in terms of discovering complementary trait-associated modules, consensus predictions derived from the challenge submissions performed best. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets and correctly prioritize candidate disease genes. This community challenge establishes benchmarks, tools and guidelines for molecular network analysis to study human disease biology (https://synapse.org/modulechallenge).}, keywords = {}, pubstate = {published}, tppubtype = {article} } Identification of modules in molecular networks is at the core of many current analysis methods in biomedical research. However, how well different approaches identify disease-relevant modules in different types of networks remains poorly understood. We launched the "Disease Module Identification DREAM Challenge", an open competition to comprehensively assess module identification methods across diverse gene, protein and signaling networks. Predicted network modules were tested for association with complex traits and diseases using a unique collection of 180 genome-wide association studies (GWAS). While a number of approaches were successful in terms of discovering complementary trait-associated modules, consensus predictions derived from the challenge submissions performed best. We find that most of these modules correspond to core disease-relevant pathways, which often comprise therapeutic targets and correctly prioritize candidate disease genes. This community challenge establishes benchmarks, tools and guidelines for molecular network analysis to study human disease biology (https://synapse.org/modulechallenge). |
Brunk, Elizabeth; Sahoo, Swagatika; Zielinski, Daniel C; Altunkaya, Ali; Dräger, Andreas; Mih, Nathan; Gatto, Francesco; Nilsson, Avlant; Gonzalez, German Andres Preciat; Aurich, Maike Kathrin; Prlić, Andreas; Sastry, Anand; Danielsdottir, Anna D; Heinken, Almut; Noronha, Alberto; Rose, Peter W; Burley, Stephen K; Fleming, Ronan M T; Nielsen, Jens; Thiele, Ines; Palsson, Bernhard O Recon3D enables a three-dimensional view of gene variation in human metabolism Nature Biotechnology, 2018. @article{articlereference.2018-02-21.3019096605, title = {Recon3D enables a three-dimensional view of gene variation in human metabolism}, author = {Elizabeth Brunk and Swagatika Sahoo and Daniel C Zielinski and Ali Altunkaya and Andreas Dräger and Nathan Mih and Francesco Gatto and Avlant Nilsson and German Andres Preciat Gonzalez and Maike Kathrin Aurich and Andreas Prlić and Anand Sastry and Anna D Danielsdottir and Almut Heinken and Alberto Noronha and Peter W Rose and Stephen K Burley and Ronan M T Fleming and Jens Nielsen and Ines Thiele and Bernhard O Palsson}, url = {https://www.nature.com/articles/nbt.4072}, year = {2018}, date = {2018-01-01}, journal = {Nature Biotechnology}, abstract = {Genome-scale network reconstructions have helped uncover the molecular basis of metabolism. Here we present Recon3D, a computational resource that includes three-dimensional (3D) metabolite and protein structure data and enables integrated analyses of metabolic functions in humans. We use Recon3D to functionally characterize mutations associated with disease, and identify metabolic response signatures that are caused by exposure to certain drugs. Recon3D represents the most comprehensive human metabolic network model to date, accounting for 3,288 open reading frames (representing 17% of functionally annotated human genes), 13,543 metabolic reactions involving 4,140 unique metabolites, and 12,890 protein structures. These data provide a unique resource for investigating molecular mechanisms of human metabolism. Recon3D is available at http://vmh.life .}, keywords = {}, pubstate = {published}, tppubtype = {article} } Genome-scale network reconstructions have helped uncover the molecular basis of metabolism. Here we present Recon3D, a computational resource that includes three-dimensional (3D) metabolite and protein structure data and enables integrated analyses of metabolic functions in humans. We use Recon3D to functionally characterize mutations associated with disease, and identify metabolic response signatures that are caused by exposure to certain drugs. Recon3D represents the most comprehensive human metabolic network model to date, accounting for 3,288 open reading frames (representing 17% of functionally annotated human genes), 13,543 metabolic reactions involving 4,140 unique metabolites, and 12,890 protein structures. These data provide a unique resource for investigating molecular mechanisms of human metabolism. Recon3D is available at http://vmh.life . |
Thijssen, Bram; Dijkstra, Tjeerd M H; Heskes, Tom; Wessels, Lodewyk F A Bayesian data integration for quantifying the contribution of diverse measurements to parameter estimates Bioinformatics, 34 (5), pp. 803-811, 2018. @article{articlereference.2018-03-06.7162030647, title = {Bayesian data integration for quantifying the contribution of diverse measurements to parameter estimates}, author = {Bram Thijssen and Tjeerd M H Dijkstra and Tom Heskes and Lodewyk F A Wessels}, url = {https://academic.oup.com/bioinformatics/article/34/5/803/4563568}, year = {2018}, date = {2018-01-01}, journal = {Bioinformatics}, volume = {34}, number = {5}, pages = {803-811}, abstract = {Motivation: Computational models in biology are frequently underdetermined, due to limits in our capacity to measure biological systems. In particular, mechanistic models often contain parameters whose values are not constrained by a single type ofmeasurement. Itmay be possible to achieve better model determination by combining the information contained in different types of measurements. Bayesian statistics provides a convenient framework for this, allowing a quantification of the reduction in uncertainty with each additional measurement type. We wished to explore whether such integration is feasible and whether it can allow computationalmodels to bemore accurately determined. Results: We created an ordinary differential equation model of cell cycle regulation in budding yeast and integrated data from 13 different studies covering different experimental techniques. We found that for some parameters, a single type of measurement, relative time course mRNA expression, is sufficient to constrain them. Other parameters, however, were only constrained when two types of measurements were combined, namely relative time course and absolute transcript concentration. Comparing the estimates to measurements from three additional, independent studies, we found that the degradation and transcription rates indeed matched the model predictions in order of magnitude. The predicted translation rate was incorrect however, thus revealing a deficiency in the model. Since this parameter was not constrained by any of the measurement types separately, it was only possible to falsify the model when integratingmultiple types ofmeasurements. In conclusion, this study shows that integratingmultiplemeasurement types can allow models to be more accurately determined. Availability and implementation: The models and files required for running the inference are included in the Supplementary information.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: Computational models in biology are frequently underdetermined, due to limits in our capacity to measure biological systems. In particular, mechanistic models often contain parameters whose values are not constrained by a single type ofmeasurement. Itmay be possible to achieve better model determination by combining the information contained in different types of measurements. Bayesian statistics provides a convenient framework for this, allowing a quantification of the reduction in uncertainty with each additional measurement type. We wished to explore whether such integration is feasible and whether it can allow computationalmodels to bemore accurately determined. Results: We created an ordinary differential equation model of cell cycle regulation in budding yeast and integrated data from 13 different studies covering different experimental techniques. We found that for some parameters, a single type of measurement, relative time course mRNA expression, is sufficient to constrain them. Other parameters, however, were only constrained when two types of measurements were combined, namely relative time course and absolute transcript concentration. Comparing the estimates to measurements from three additional, independent studies, we found that the degradation and transcription rates indeed matched the model predictions in order of magnitude. The predicted translation rate was incorrect however, thus revealing a deficiency in the model. Since this parameter was not constrained by any of the measurement types separately, it was only possible to falsify the model when integratingmultiple types ofmeasurements. In conclusion, this study shows that integratingmultiplemeasurement types can allow models to be more accurately determined. Availability and implementation: The models and files required for running the inference are included in the Supplementary information. |
Lederer, Simone; Dijkstra, Tjeerd M H; Heskes, Tom Additive Dose Response Models: Explicit Formulation and the Loewe Additivity Consistency Condition Frontiers in Pharmacology, 9 (31), 2018. @article{articlereference.2018-03-06.2759999135, title = {Additive Dose Response Models: Explicit Formulation and the Loewe Additivity Consistency Condition}, author = {Simone Lederer and Tjeerd M H Dijkstra and Tom Heskes}, url = {https://www.frontiersin.org/articles/10.3389/fphar.2018.00031/full}, year = {2018}, date = {2018-01-01}, journal = {Frontiers in Pharmacology}, volume = {9}, number = {31}, abstract = {High-throughput techniques allow for massive screening of drug combinations. To find combinations that exhibit an interaction effect, one filters for promising compound combinations by comparing to a response without interaction. A common principle for no interaction is Loewe Additivity which is based on the assumption that no compound interacts with itself and that two doses from different compounds having the same effect are equivalent. It then should not matter whether a component is replaced by the other or vice versa. We call this assumption the Loewe Additivity Consistency Condition (LACC). We derive explicit and implicit null reference models from the Loewe Additivity principle that are equivalent when the LACC holds. Of these two formulations, the implicit formulation is the known General Isobole Equation (Loewe, 1928), whereas the explicit one is the novel contribution. The LACC is violated in a significant number of cases. In this scenario the models make different predictions. We analyze two data sets of drug screening that are non-interactive (Cokol et al., 2011; Yadav et al., 2015) and show that the LACC is mostly violated and Loewe Additivity not defined. Further, we compare the measurements of the non-interactive cases of both data sets to the theoretical null reference models in terms of bias and mean squared error. We demonstrate that the explicit formulation of the null reference model leads to smaller mean squared errors than the implicit one and is much faster to compute.}, keywords = {}, pubstate = {published}, tppubtype = {article} } High-throughput techniques allow for massive screening of drug combinations. To find combinations that exhibit an interaction effect, one filters for promising compound combinations by comparing to a response without interaction. A common principle for no interaction is Loewe Additivity which is based on the assumption that no compound interacts with itself and that two doses from different compounds having the same effect are equivalent. It then should not matter whether a component is replaced by the other or vice versa. We call this assumption the Loewe Additivity Consistency Condition (LACC). We derive explicit and implicit null reference models from the Loewe Additivity principle that are equivalent when the LACC holds. Of these two formulations, the implicit formulation is the known General Isobole Equation (Loewe, 1928), whereas the explicit one is the novel contribution. The LACC is violated in a significant number of cases. In this scenario the models make different predictions. We analyze two data sets of drug screening that are non-interactive (Cokol et al., 2011; Yadav et al., 2015) and show that the LACC is mostly violated and Loewe Additivity not defined. Further, we compare the measurements of the non-interactive cases of both data sets to the theoretical null reference models in terms of bias and mean squared error. We demonstrate that the explicit formulation of the null reference model leads to smaller mean squared errors than the implicit one and is much faster to compute. |
Löffler, Markus W; Kowalewski, Daniel J; Backert, Linus; Bernhardt, Jörg; Adam, Patrick; Schuster, Heiko; Dengler, Florian; Backes, Daniel; Kopp, Hans-Georg; Beckert, Stefan; Wagner, Silvia; Königsrainer, Ingmar; Kohlbacher, Oliver; Kanz, Lothar; Königsrainer, Alfred; Rammensee, Hans-Georg; Stevanovic, Stefan; Haen, Sebastian P Mapping the HLA ligandome of Colorectal Cancer Reveals an Imprint of Malignant Cell Transformation Cancer Res., 78 (16), pp. 4627-4641, 2018. @article{LoefflerCanRes2018, title = {Mapping the HLA ligandome of Colorectal Cancer Reveals an Imprint of Malignant Cell Transformation}, author = {Markus W Löffler and Daniel J Kowalewski and Linus Backert and Jörg Bernhardt and Patrick Adam and Heiko Schuster and Florian Dengler and Daniel Backes and Hans-Georg Kopp and Stefan Beckert and Silvia Wagner and Ingmar Königsrainer and Oliver Kohlbacher and Lothar Kanz and Alfred Königsrainer and Hans-Georg Rammensee and Stefan Stevanovic and Sebastian P Haen}, url = {https://doi.org/10.1158/0008-5472.CAN-17-1745}, year = {2018}, date = {2018-01-01}, journal = {Cancer Res.}, volume = {78}, number = {16}, pages = {4627-4641}, abstract = {Immune cell infiltrates have proven highly relevant for colorectal carcinoma (CRC) prognosis, making CRC a promising candidate for immunotherapy. Since tumors interact with the immune system via HLA-presented peptide ligands, exact knowledge of the peptidome constitution is fundamental for understanding this relationship. Here we comprehensively describe the naturally presented HLA-ligandome of CRC and corresponding non-malignant colon (NMC) tissue. Mass spectrometry identified 35,367 and 28,132 HLA-class I ligands on CRC and NMC, attributable to 7,684 and 6,312 distinct source proteins, respectively. Cancer-exclusive peptides were assessed on source protein level using Kyoto Encyclopedia of Genes and Genomes (KEGG) and protein analysis through evolutionary relationships (PANTHER), revealing pathognomonic CRC-associated pathways including Wnt, TGF-β, PI3K, p53, and RTK-RAS. Relative quantitation of peptide presentation on paired CRC and NMC tissue further identified source proteins from cancer- and infection-associated pathways to be over-represented merely within the CRC ligandome. From the pool of tumor-exclusive peptides, a selected HLA-ligand subset was assessed for immunogenicity, with the majority exhibiting an existing T cell repertoire. Overall, these data show that the HLA-ligandome reflects cancer-associated pathways implicated in CRC oncogenesis, suggesting that alterations in tumor cell metabolism could result in cancer-specific, albeit not mutation-derived tumor-antigens. Hence, a defined pool of unique tumor peptides, attributable to complex cellular alterations that are exclusive to malignant cells might comprise promising candidates for immunotherapeutic applications.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Immune cell infiltrates have proven highly relevant for colorectal carcinoma (CRC) prognosis, making CRC a promising candidate for immunotherapy. Since tumors interact with the immune system via HLA-presented peptide ligands, exact knowledge of the peptidome constitution is fundamental for understanding this relationship. Here we comprehensively describe the naturally presented HLA-ligandome of CRC and corresponding non-malignant colon (NMC) tissue. Mass spectrometry identified 35,367 and 28,132 HLA-class I ligands on CRC and NMC, attributable to 7,684 and 6,312 distinct source proteins, respectively. Cancer-exclusive peptides were assessed on source protein level using Kyoto Encyclopedia of Genes and Genomes (KEGG) and protein analysis through evolutionary relationships (PANTHER), revealing pathognomonic CRC-associated pathways including Wnt, TGF-β, PI3K, p53, and RTK-RAS. Relative quantitation of peptide presentation on paired CRC and NMC tissue further identified source proteins from cancer- and infection-associated pathways to be over-represented merely within the CRC ligandome. From the pool of tumor-exclusive peptides, a selected HLA-ligand subset was assessed for immunogenicity, with the majority exhibiting an existing T cell repertoire. Overall, these data show that the HLA-ligandome reflects cancer-associated pathways implicated in CRC oncogenesis, suggesting that alterations in tumor cell metabolism could result in cancer-specific, albeit not mutation-derived tumor-antigens. Hence, a defined pool of unique tumor peptides, attributable to complex cellular alterations that are exclusive to malignant cells might comprise promising candidates for immunotherapeutic applications. |
Kahles, André; Lehmann, Kjong-Van; Toussaint, Nora C; Hüser, Matthias; Stark, Stefan; Sachsenberg, Timo; Stegle, Oliver; Kohlbacher, Oliver; Sander, Chris; Network, Gunnar TCGA PanCanAtlas; Rätsch, Comprehensive Analysis of Alternative Splicing Across Tumors from 8,705 Patients Cancer Cell, 34 (2), pp. 211-224, 2018. @article{SplicedEpitopesCancerCell2018, title = {Comprehensive Analysis of Alternative Splicing Across Tumors from 8,705 Patients}, author = {André Kahles and Kjong-Van Lehmann and Nora C Toussaint and Matthias Hüser and Stefan Stark and Timo Sachsenberg and Oliver Stegle and Oliver Kohlbacher and Chris Sander and Gunnar TCGA PanCanAtlas Network and Rätsch}, url = {https://www.sciencedirect.com/science/article/pii/S1535610818303064?via%3Dihub}, year = {2018}, date = {2018-01-01}, journal = {Cancer Cell}, volume = {34}, number = {2}, pages = {211-224}, abstract = {Our comprehensive analysis of alternative splicing across 32 The Cancer Genome Atlas cancer types from 8,705 patients detects alternative splicing events and tumor variants by reanalyzing RNA and whole-exome sequencing data. Tumors have up to 30% more alternative splicing events than normal samples. Association analysis of somatic variants with alternative splicing events confirmed known trans associations with variants in SF3B1 and U2AF1 and identified additional trans-acting variants (e.g., TADA1, PPP2R1A). Many tumors have thousands of alternative splicing events not detectable in normal samples; on average, we identified ≈930 exon-exon junctions (“neojunctions”) in tumors not typically found in GTEx normals. From Clinical Proteomic Tumor Analysis Consortium data available for breast and ovarian tumor samples, we confirmed ≈1.7 neojunction- and ≈0.6 single nucleotide variant-derived peptides per tumor sample that are also predicted major histocompatibility complex-I binders (“putative neoantigens”).}, keywords = {}, pubstate = {published}, tppubtype = {article} } Our comprehensive analysis of alternative splicing across 32 The Cancer Genome Atlas cancer types from 8,705 patients detects alternative splicing events and tumor variants by reanalyzing RNA and whole-exome sequencing data. Tumors have up to 30% more alternative splicing events than normal samples. Association analysis of somatic variants with alternative splicing events confirmed known trans associations with variants in SF3B1 and U2AF1 and identified additional trans-acting variants (e.g., TADA1, PPP2R1A). Many tumors have thousands of alternative splicing events not detectable in normal samples; on average, we identified ≈930 exon-exon junctions (“neojunctions”) in tumors not typically found in GTEx normals. From Clinical Proteomic Tumor Analysis Consortium data available for breast and ovarian tumor samples, we confirmed ≈1.7 neojunction- and ≈0.6 single nucleotide variant-derived peptides per tumor sample that are also predicted major histocompatibility complex-I binders (“putative neoantigens”). |
Fröhlich, Holger; Balling, Rudi; Beerenwinkel, Niko; Kohlbacher, Oliver; Kumar, Santosh; Lengauer, Thomas; Maathuis, Marloes; Moreau, Yves; Murphy, Susan; Przytycka, Teresa; Röst, Hannes; Rebhan, Michael; Schuppert, Andreas; Schwab, Matthias; Spang, Rainer; Stekhoven, Daniel; Sun, Jimeng; Ziemek, Daniel; Zupan, Blaz From Hype to Reality: Data Science enabling Personalized Medicine BMC Medicine, 16 , pp. 150, 2018. @article{PM_Dagstuhl_2018, title = {From Hype to Reality: Data Science enabling Personalized Medicine}, author = {Holger Fröhlich and Rudi Balling and Niko Beerenwinkel and Oliver Kohlbacher and Santosh Kumar and Thomas Lengauer and Marloes Maathuis and Yves Moreau and Susan Murphy and Teresa Przytycka and Hannes Röst and Michael Rebhan and Andreas Schuppert and Matthias Schwab and Rainer Spang and Daniel Stekhoven and Jimeng Sun and Daniel Ziemek and Blaz Zupan}, url = {https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-018-1122-7}, year = {2018}, date = {2018-01-01}, journal = {BMC Medicine}, volume = {16}, pages = {150}, abstract = {Background Personalized, precision, P4, or stratified medicine is understood as a medical approach in which patients are stratified based on their disease subtype, risk, prognosis, or treatment response using specialized diagnostic tests. The key idea is to base medical decisions on individual patient characteristics, including molecular and behavioral biomarkers, rather than on population averages. Personalized medicine is deeply connected to and dependent on data science, specifically machine learning (often named Artificial Intelligence in the mainstream media). While during recent years there has been a lot of enthusiasm about the potential of ‘big data’ and machine learning-based solutions, there exist only few examples that impact current clinical practice. The lack of impact on clinical practice can largely be attributed to insufficient performance of predictive models, difficulties to interpret complex model predictions, and lack of validation via prospective clinical trials that demonstrate a clear benefit compared to the standard of care. In this paper, we review the potential of state-of-the-art data science approaches for personalized medicine, discuss open challenges, and highlight directions that may help to overcome them in the future. Conclusions There is a need for an interdisciplinary effort, including data scientists, physicians, patient advocates, regulatory agencies, and health insurance organizations. Partially unrealistic expectations and concerns about data science-based solutions need to be better managed. In parallel, computational methods must advance more to provide direct benefit to clinical practice.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Personalized, precision, P4, or stratified medicine is understood as a medical approach in which patients are stratified based on their disease subtype, risk, prognosis, or treatment response using specialized diagnostic tests. The key idea is to base medical decisions on individual patient characteristics, including molecular and behavioral biomarkers, rather than on population averages. Personalized medicine is deeply connected to and dependent on data science, specifically machine learning (often named Artificial Intelligence in the mainstream media). While during recent years there has been a lot of enthusiasm about the potential of ‘big data’ and machine learning-based solutions, there exist only few examples that impact current clinical practice. The lack of impact on clinical practice can largely be attributed to insufficient performance of predictive models, difficulties to interpret complex model predictions, and lack of validation via prospective clinical trials that demonstrate a clear benefit compared to the standard of care. In this paper, we review the potential of state-of-the-art data science approaches for personalized medicine, discuss open challenges, and highlight directions that may help to overcome them in the future. Conclusions There is a need for an interdisciplinary effort, including data scientists, physicians, patient advocates, regulatory agencies, and health insurance organizations. Partially unrealistic expectations and concerns about data science-based solutions need to be better managed. In parallel, computational methods must advance more to provide direct benefit to clinical practice. |
Prasser, Fabian; Kohlbacher, Oliver; Mansmann, Ulrich; Bauer, Bernhard; Kuhn, Klaus A Data Integration for Future Medicine (DIFUTURE) - An Architectural and Methodological Overview Methods Inf Med, 57 (S01), pp. e57-r65, 2018. @article{DIFUTURE_Methods_2018, title = {Data Integration for Future Medicine (DIFUTURE) - An Architectural and Methodological Overview}, author = {Fabian Prasser and Oliver Kohlbacher and Ulrich Mansmann and Bernhard Bauer and Klaus A Kuhn}, url = {https://www.thieme-connect.de/products/ejournals/abstract/10.3414/ME17-02-0022}, year = {2018}, date = {2018-01-01}, journal = {Methods Inf Med}, volume = {57}, number = {S01}, pages = {e57-r65}, abstract = {Introduction: This article is part of the Focus Theme of Methods of Information in Medicine on the German Medical Informatics Initiative. Future medicine will be predictive, preventive, personalized, participatory and digital. Data and knowledge at comprehensive depth and breadth need to be available for research and at the point of care as a basis for targeted diagnosis and therapy. Data integration and data sharing will be essential to achieve these goals. For this purpose, the consortium Data Integration for Future Medicine (DIFUTURE) will establish Data Integration Centers (DICs) at university medical centers. Objectives: The infrastructure envisioned by DIFUTURE will provide researchers with cross-site access to data and support physicians by innovative views on integrated data as well as by decision support components for personalized treatments. The aim of our use cases is to show that this accelerates innovation, improves health care processes and results in tangible benefits for our patients. To realize our vision, numerous challenges have to be addressed. The objective of this article is to describe our concepts and solutions on the technical and the organizational level with a specific focus on data integration and sharing. Governance and Policies: Data sharing implies significant security and privacy challenges. Therefore, state-of-the-art data protection, modern IT security concepts and patient trust play a central role in our approach. We have established governance structures and policies safeguarding data use and sharing by technical and organizational measures providing highest levels of data protection. One of our central policies is that adequate methods of data sharing for each use case and project will be selected based on rigorous risk and threat analyses. Interdisciplinary groups have been installed in order to manage change. Architectural Framework and Methodology: The DIFUTURE Data Integration Centers will implement a three-step approach to integrating, harmonizing and sharing structured, unstructured and omics data as well as images from clinical and research environments. First, data is imported and technically harmonized using common data and interface standards (including various IHE profiles, DICOM and HL7 FHIR). Second, data is preprocessed, transformed, harmonized and enriched within a staging and working environment. Third, data is imported into common analytics platforms and data models (including i2b2 and tranSMART) and made accessible in a form compliant with the interoperability requirements defined on the national level. Secure data access and sharing will be implemented with innovative combinations of privacy-enhancing technologies (safe data, safe settings, safe outputs) and methods of distributed computing. Use Cases: From the perspective of health care and medical research, our approach is disease-oriented and use-case driven, i.e. following the needs of physicians and researchers and aiming at measurable benefits for our patients. We will work on early diagnosis, tailored therapies and therapy decision tools with focuses on neurology, oncology and further disease entities. Our early uses cases will serve as blueprints for the following ones, verifying that the infrastructure developed by DIFUTURE is able to support a variety of application scenarios. Discussion: Own previous work, the use of internationally successful open source systems and a state-of-the-art software architecture are cornerstones of our approach. In the conceptual phase of the initiative, we have already prototypically implemented and tested the most important components of our architecture.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Introduction: This article is part of the Focus Theme of Methods of Information in Medicine on the German Medical Informatics Initiative. Future medicine will be predictive, preventive, personalized, participatory and digital. Data and knowledge at comprehensive depth and breadth need to be available for research and at the point of care as a basis for targeted diagnosis and therapy. Data integration and data sharing will be essential to achieve these goals. For this purpose, the consortium Data Integration for Future Medicine (DIFUTURE) will establish Data Integration Centers (DICs) at university medical centers. Objectives: The infrastructure envisioned by DIFUTURE will provide researchers with cross-site access to data and support physicians by innovative views on integrated data as well as by decision support components for personalized treatments. The aim of our use cases is to show that this accelerates innovation, improves health care processes and results in tangible benefits for our patients. To realize our vision, numerous challenges have to be addressed. The objective of this article is to describe our concepts and solutions on the technical and the organizational level with a specific focus on data integration and sharing. Governance and Policies: Data sharing implies significant security and privacy challenges. Therefore, state-of-the-art data protection, modern IT security concepts and patient trust play a central role in our approach. We have established governance structures and policies safeguarding data use and sharing by technical and organizational measures providing highest levels of data protection. One of our central policies is that adequate methods of data sharing for each use case and project will be selected based on rigorous risk and threat analyses. Interdisciplinary groups have been installed in order to manage change. Architectural Framework and Methodology: The DIFUTURE Data Integration Centers will implement a three-step approach to integrating, harmonizing and sharing structured, unstructured and omics data as well as images from clinical and research environments. First, data is imported and technically harmonized using common data and interface standards (including various IHE profiles, DICOM and HL7 FHIR). Second, data is preprocessed, transformed, harmonized and enriched within a staging and working environment. Third, data is imported into common analytics platforms and data models (including i2b2 and tranSMART) and made accessible in a form compliant with the interoperability requirements defined on the national level. Secure data access and sharing will be implemented with innovative combinations of privacy-enhancing technologies (safe data, safe settings, safe outputs) and methods of distributed computing. Use Cases: From the perspective of health care and medical research, our approach is disease-oriented and use-case driven, i.e. following the needs of physicians and researchers and aiming at measurable benefits for our patients. We will work on early diagnosis, tailored therapies and therapy decision tools with focuses on neurology, oncology and further disease entities. Our early uses cases will serve as blueprints for the following ones, verifying that the infrastructure developed by DIFUTURE is able to support a variety of application scenarios. Discussion: Own previous work, the use of internationally successful open source systems and a state-of-the-art software architecture are cornerstones of our approach. In the conceptual phase of the initiative, we have already prototypically implemented and tested the most important components of our architecture. |
Gleim, Lars Christoph; Karim, Md. Rezaul; Zimmermann, Lukas; Stenzhorn, Holger; Decker, Stefan; Beyan, Oya Schema Extraction for Privacy Preserving Processing of Sensitive Data MEPDaW-SeWeBMeDA-SWeTI 2018 Joint Proceedings of MEPDaW, SeWeBMeDA and SWeTI 2018, 2112 , CEUR Workshop Proceedings 2018. @conference{Extraction_2018, title = {Schema Extraction for Privacy Preserving Processing of Sensitive Data}, author = {Lars Christoph Gleim and Md. Rezaul Karim and Lukas Zimmermann and Holger Stenzhorn and Stefan Decker and Oya Beyan}, url = {http://ceur-ws.org/Vol-2112/sewebmeda_paper_1.pdf}, year = {2018}, date = {2018-01-01}, booktitle = {MEPDaW-SeWeBMeDA-SWeTI 2018 Joint Proceedings of MEPDaW, SeWeBMeDA and SWeTI 2018}, volume = {2112}, pages = {36-47}, series = {CEUR Workshop Proceedings}, abstract = {Lars Christoph Gleim, Md. Rezaul Karim, Lukas Zimmermann, Oliver Kohlbacher, Holger Stenzhorn, Stefan Decker, Oya Beyan Sharing privacy sensitive data across organizational boundaries is commonly not a viable option due to the legal and ethical restrictions. Regulations such as the EU General Data Protection Rules impose strict requirements concerning the protection of personal data. Therefore new approaches are emerging to utilize data right in their original repositories without giving direct access to third parties, such as the Personal Health Train initiative [16]. Circumventing limitations of previous systems, this paper proposes an automated schema extraction approach compatible with existing Semantic Web-based technologies. The extracted schema enables ad-hoc query formulation against privacy sensitive data sources without requiring data access, and successive execution of that request in a secure enclave under the data provider’s control. The developed approach permit us to extract structural information from non-uniformed resources and merge it into a single schema to preserve the privacy of each data source. Initial experiments show that our approach overcomes the reliance of previous approaches on agreeing upon shared schema and encoding a priori in favor of more flexible schema extraction and introspection.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Lars Christoph Gleim, Md. Rezaul Karim, Lukas Zimmermann, Oliver Kohlbacher, Holger Stenzhorn, Stefan Decker, Oya Beyan Sharing privacy sensitive data across organizational boundaries is commonly not a viable option due to the legal and ethical restrictions. Regulations such as the EU General Data Protection Rules impose strict requirements concerning the protection of personal data. Therefore new approaches are emerging to utilize data right in their original repositories without giving direct access to third parties, such as the Personal Health Train initiative [16]. Circumventing limitations of previous systems, this paper proposes an automated schema extraction approach compatible with existing Semantic Web-based technologies. The extracted schema enables ad-hoc query formulation against privacy sensitive data sources without requiring data access, and successive execution of that request in a secure enclave under the data provider’s control. The developed approach permit us to extract structural information from non-uniformed resources and merge it into a single schema to preserve the privacy of each data source. Initial experiments show that our approach overcomes the reliance of previous approaches on agreeing upon shared schema and encoding a priori in favor of more flexible schema extraction and introspection. |
Domingo-Calap, Pilar; Schubert, Benjamin; Joly, Mélanie; Solis, Morgane; Untrau, Meiggie; Carapito, Raphael; Georgel, Philippe; Caillard, Sophie; Fafi-Kremer, Samira; Paul, Nicodème; Kohlbacher, Oliver; González-Candelas, Fernando; Bahram, Seiamak An unusually high substitution rate in transplant-associated BK polyomavirus in vivo is further concentrated in HLA-C-bound viral peptides PLoS Pathogens, 14 (10), pp. e1007368, 2018. @article{PlosPathHLAC2018, title = {An unusually high substitution rate in transplant-associated BK polyomavirus in vivo is further concentrated in HLA-C-bound viral peptides}, author = {Pilar Domingo-Calap and Benjamin Schubert and Mélanie Joly and Morgane Solis and Meiggie Untrau and Raphael Carapito and Philippe Georgel and Sophie Caillard and Samira Fafi-Kremer and Nicodème Paul and Oliver Kohlbacher and Fernando González-Candelas and Seiamak Bahram}, url = {https://dx.doi.org/10.1371%2Fjournal.ppat.1007368}, year = {2018}, date = {2018-01-01}, journal = {PLoS Pathogens}, volume = {14}, number = {10}, pages = {e1007368}, abstract = {Infection with human BK polyomavirus, a small double-stranded DNA virus, potentially results in severe complications in immunocompromised patients. Here, we describe the in vivo variability and evolution of the BK polyomavirus by deep sequencing. Our data reveal the highest genomic evolutionary rate described in double-stranded DNA viruses, i.e., 10-3-10-5 substitutions per nucleotide site per year. High mutation rates in viruses allow their escape from immune surveillance and adaptation to new hosts. By combining mutational landscapes across viral genomes with in silico prediction of viral peptides, we demonstrate the presence of significantly more coding substitutions within predicted cognate HLA-C-bound viral peptides than outside. This finding suggests a role for HLA-C in antiviral immunity, perhaps through the action of killer cell immunoglobulin-like receptors. The present study provides a comprehensive view of viral evolution and immune escape in a DNA virus.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Infection with human BK polyomavirus, a small double-stranded DNA virus, potentially results in severe complications in immunocompromised patients. Here, we describe the in vivo variability and evolution of the BK polyomavirus by deep sequencing. Our data reveal the highest genomic evolutionary rate described in double-stranded DNA viruses, i.e., 10-3-10-5 substitutions per nucleotide site per year. High mutation rates in viruses allow their escape from immune surveillance and adaptation to new hosts. By combining mutational landscapes across viral genomes with in silico prediction of viral peptides, we demonstrate the presence of significantly more coding substitutions within predicted cognate HLA-C-bound viral peptides than outside. This finding suggests a role for HLA-C in antiviral immunity, perhaps through the action of killer cell immunoglobulin-like receptors. The present study provides a comprehensive view of viral evolution and immune escape in a DNA virus. |
Deutsch, Eric; Perez-Riverol, Yasset; Chalkley, Robert; Wilhelm, Mathias; Tate, Stephen; Sachsenberg, Timo; Walzer, Mathias; Käll, Lukas; Delanghe, Bernard; Böcker, Sebastian; Schymanski, Emma; Wilmes, Paul; Dorfer, Viktoria; Kuster, Bernhard; Volders, Pieter-Jan; Jehmlich, Nico; Vissers, Johannes; Wolan, Dennis; Wang, Ana; Mendoza, Luis; Shofstahl, Jim; Dowsey, Andrew; Griss, Johannes; Salek, Reza; Neumann, Steffen; Binz, Pierre-Alain; Lam, Henry; Vizcaíno, Juan; Bandeira, Nuno; Röst, Hannes Expanding the use of spectral libraries in proteomics Journal of Proteome Research, 2018. @article{JPRSpecLib2018, title = {Expanding the use of spectral libraries in proteomics}, author = {Eric Deutsch and Yasset Perez-Riverol and Robert Chalkley and Mathias Wilhelm and Stephen Tate and Timo Sachsenberg and Mathias Walzer and Lukas Käll and Bernard Delanghe and Sebastian Böcker and Emma Schymanski and Paul Wilmes and Viktoria Dorfer and Bernhard Kuster and Pieter-Jan Volders and Nico Jehmlich and Johannes Vissers and Dennis Wolan and Ana Wang and Luis Mendoza and Jim Shofstahl and Andrew Dowsey and Johannes Griss and Reza Salek and Steffen Neumann and Pierre-Alain Binz and Henry Lam and Juan Vizcaíno and Nuno Bandeira and Hannes Röst}, url = {https://pubs.acs.org/doi/10.1021/acs.jproteome.8b00485}, year = {2018}, date = {2018-01-01}, journal = {Journal of Proteome Research}, abstract = {The 2017 Dagstuhl Seminar on Computational Proteomics provided an opportunity for a broad discussion on the current state and future directions of the generation and use of peptide tandem mass spectrometry spectral libraries. Their use in proteomics is growing slowly, but there are multiple challenges in the field that must be addressed to further increase the adoption of spectral libraries and related techniques. The primary bottlenecks are the paucity of high quality and comprehensive libraries and the general difficulty of adopting spectral library searching into existing workflows. There are several existing spectral library formats, but none capture a satisfactory level of metadata; therefore a logical next improvement is to design a more advanced, Proteomics Standards Initiative-approved spectral library format that can encode all of the desired metadata. The group discussed a series of metadata requirements organized into three designations of completeness or quality, tentatively dubbed bronze, silver, and gold. The metadata can be organized at four different levels of granularity: at the collection (library) level, at the individual entry (peptide ion) level, at the peak (fragment ion) level, and at the peak annotation level. Strategies for encoding mass modifications in a consistent manner and the requirement for encoding high-quality and commonly-seen but as-yet-unidentified spectra were discussed. The group also discussed related topics, including strategies for comparing two spectra, techniques for generating representative spectra for a library, approaches for selection of optimal signature ions for targeted workflows, and issues surrounding the merging of two or more libraries into one. We present here a review of this field and the challenges that the community must address in order to accelerate the adoption of spectral libraries in routine analysis of proteomics datasets.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The 2017 Dagstuhl Seminar on Computational Proteomics provided an opportunity for a broad discussion on the current state and future directions of the generation and use of peptide tandem mass spectrometry spectral libraries. Their use in proteomics is growing slowly, but there are multiple challenges in the field that must be addressed to further increase the adoption of spectral libraries and related techniques. The primary bottlenecks are the paucity of high quality and comprehensive libraries and the general difficulty of adopting spectral library searching into existing workflows. There are several existing spectral library formats, but none capture a satisfactory level of metadata; therefore a logical next improvement is to design a more advanced, Proteomics Standards Initiative-approved spectral library format that can encode all of the desired metadata. The group discussed a series of metadata requirements organized into three designations of completeness or quality, tentatively dubbed bronze, silver, and gold. The metadata can be organized at four different levels of granularity: at the collection (library) level, at the individual entry (peptide ion) level, at the peak (fragment ion) level, and at the peak annotation level. Strategies for encoding mass modifications in a consistent manner and the requirement for encoding high-quality and commonly-seen but as-yet-unidentified spectra were discussed. The group also discussed related topics, including strategies for comparing two spectra, techniques for generating representative spectra for a library, approaches for selection of optimal signature ions for targeted workflows, and issues surrounding the merging of two or more libraries into one. We present here a review of this field and the challenges that the community must address in order to accelerate the adoption of spectral libraries in routine analysis of proteomics datasets. |
Karim, Md. Rezaul; Nguyen, Binh-Phi; Zimmermann, Lukas; Kirsten, Toralf; Löbe, Matthias; Meineke, Frank; Stenzhorn, Holger; Kohlbacher, Oliver; Decker, Stefan; Beyan, Oya (Ed.) A Distributed Analytics Platform to Execute FHIR based Phenotyping Algorithms 2018. (BibTeX) @proceedings{PHTFHIR2018, title = {A Distributed Analytics Platform to Execute FHIR based Phenotyping Algorithms}, editor = {Md. Rezaul Karim and Binh-Phi Nguyen and Lukas Zimmermann and Toralf Kirsten and Matthias Löbe and Frank Meineke and Holger Stenzhorn and Oliver Kohlbacher and Stefan Decker and Oya Beyan}, year = {2018}, date = {2018-01-01}, series = {11th International Conference on Semantic Web Applications and Tools for Healthcare and Life Sciences (SWAT4HCLS'2018)}, keywords = {}, pubstate = {published}, tppubtype = {proceedings} } |
Wein, Samuel; Andrews, Byron; Sachsenberg, Timo; Santos-Rosa, Helena; Kohlbacher, Oliver; Kouzarides, Tony; Garcia, Benjamin A; Weisser, Hendrik A computational platform for high-throughput analysis of RNA sequences and modifications by mass spectrometry bioRxiv, 2018. @article{Wein501668, title = {A computational platform for high-throughput analysis of RNA sequences and modifications by mass spectrometry}, author = {Samuel Wein and Byron Andrews and Timo Sachsenberg and Helena Santos-Rosa and Oliver Kohlbacher and Tony Kouzarides and Benjamin A Garcia and Hendrik Weisser}, url = {https://www.biorxiv.org/content/early/2018/12/19/501668}, year = {2018}, date = {2018-01-01}, journal = {bioRxiv}, abstract = {The field of epitranscriptomics is growing in importance, with chemical modification of RNA being associated with a wide variety of biological phenomena. A pivotal challenge in this area is the identification of modified RNA residues within their sequence contexts. Next-generation sequencing approaches are generally unable to capture modifications, although workarounds for some epigenetic marks exist. Mass spectrometry (MS) offers a comprehensive solution by using analogous approaches to shotgun proteomics. However, software support for the analysis of RNA MS data is inadequate at present and does not allow high-throughput processing. In particular, existing software solutions lack the raw performance and statistical grounding to efficiently handle the large variety of modifications present on RNA. We present a free and open-source database search engine for RNA MS data, called NucleicAcidSearchEngine (NASE), that addresses these shortcomings. We demonstrate the capability of NASE to reliably identify a wide range of modified RNA sequences in three original datasets of varying complexity. In a human tRNA sample, we characterize over 20 different modification types simultaneously and find many cases of incomplete modification.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The field of epitranscriptomics is growing in importance, with chemical modification of RNA being associated with a wide variety of biological phenomena. A pivotal challenge in this area is the identification of modified RNA residues within their sequence contexts. Next-generation sequencing approaches are generally unable to capture modifications, although workarounds for some epigenetic marks exist. Mass spectrometry (MS) offers a comprehensive solution by using analogous approaches to shotgun proteomics. However, software support for the analysis of RNA MS data is inadequate at present and does not allow high-throughput processing. In particular, existing software solutions lack the raw performance and statistical grounding to efficiently handle the large variety of modifications present on RNA. We present a free and open-source database search engine for RNA MS data, called NucleicAcidSearchEngine (NASE), that addresses these shortcomings. We demonstrate the capability of NASE to reliably identify a wide range of modified RNA sequences in three original datasets of varying complexity. In a human tRNA sample, we characterize over 20 different modification types simultaneously and find many cases of incomplete modification. |
Halfmann, Marc; Stenzhorn, Holger; Gerjets, Peter; Kohlbacher, Oliver; Oestermeier, Uwe User-Driven Development of a Novel Molecular Tumor Board Support Tool Vidal, Sören AuerMaria-Esther (Ed.): DILS 2018: Data Integration in the Life Sciences, pp. 195-199, Springer, 2018. @inproceedings{MTB_DILS2018, title = {User-Driven Development of a Novel Molecular Tumor Board Support Tool}, author = {Marc Halfmann and Holger Stenzhorn and Peter Gerjets and Oliver Kohlbacher and Uwe Oestermeier}, editor = {Sören AuerMaria-Esther Vidal}, url = {https://link.springer.com/chapter/10.1007/978-3-030-06016-9_18}, year = {2018}, date = {2018-01-01}, booktitle = {DILS 2018: Data Integration in the Life Sciences}, volume = {11371}, pages = {195-199}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, abstract = {Nowadays personalized medicine is of increasing importance, especially in the field of cancer therapy. More and more hospitals are conducting molecular tumor boards (MTBs) bringing together experts from various fields with different expertise to discuss patient cases taking into account genetic information from sequencing data. Yet, there is still a lack of tools to support collaborative exploration and decision making. To fill this gap, we developed a novel user interface to support MTBs. A task analysis of MTBs currently held at German hospitals showed, that there is less collaborative exploration during the meeting as expected, with a large part of the information search being done during the MTB preparation. Thus we designed our interface to support both situations, a single user preparing the MTB and the presentation of information and group discussion during the meeting.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Nowadays personalized medicine is of increasing importance, especially in the field of cancer therapy. More and more hospitals are conducting molecular tumor boards (MTBs) bringing together experts from various fields with different expertise to discuss patient cases taking into account genetic information from sequencing data. Yet, there is still a lack of tools to support collaborative exploration and decision making. To fill this gap, we developed a novel user interface to support MTBs. A task analysis of MTBs currently held at German hospitals showed, that there is less collaborative exploration during the meeting as expected, with a large part of the information search being done during the MTB preparation. Thus we designed our interface to support both situations, a single user preparing the MTB and the presentation of information and group discussion during the meeting. |
Friedrich, Andreas; de la Garza, Luis; Kohlbacher, Oliver Interactive Visualization for Large-Scale Multi-factorial Research Designs Springer, 11371 , 2018. @proceedings{MultiFactorial_DILS2018, title = {Interactive Visualization for Large-Scale Multi-factorial Research Designs}, author = {Andreas Friedrich and Luis de la Garza and Oliver Kohlbacher}, doi = {10.1007/978-3-030-06016-9_7}, year = {2018}, date = {2018-01-01}, volume = {11371}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, keywords = {}, pubstate = {published}, tppubtype = {proceedings} } |
Friedrich, Andreas; de la Garza, Luis; Kohlbacher, Oliver; Nahnsen, Sven Interactive Visualization for Large-Scale Multi-factorial Research Designs Auer, Maria-Esther Vidal Sören (Ed.): DILS 2018: Data Integration in the Life Sciences, pp. 75-84, 2018. @inproceedings{MultFactorial_DILS2018, title = {Interactive Visualization for Large-Scale Multi-factorial Research Designs}, author = {Andreas Friedrich and Luis de la Garza and Oliver Kohlbacher and Sven Nahnsen}, editor = {Maria-Esther Vidal Sören Auer}, url = {https://link.springer.com/chapter/10.1007/978-3-030-06016-9_7}, year = {2018}, date = {2018-01-01}, booktitle = {DILS 2018: Data Integration in the Life Sciences}, volume = {11371}, pages = {75-84}, series = {Lecture Notes in Computer Science}, abstract = {Recent publications have shown that the majority of studies cannot be adequately reproduced. The underlying causes seem to be diverse. Usage of the wrong statistical tools can lead to the reporting of dubious correlations as significant results. Missing information from lab protocols or other metadata can make verification impossible. Especially with the advent of Big Data in the life sciences and the hereby-involved measurement of thousands of multi-omics samples, researchers depend more than ever on adequate metadata annotation. In recent years, the scientific community has created multiple experimental design standards, which try to define the minimum information necessary to make experiments reproducible. Tools help with creation or analysis of this abundance of metadata, but are often still based on spreadsheet formats and lack intuitive visualizations. We present an interactive graph visualization tailored to experiments using a factorial experimental design. Our solution summarizes sample sources and extracted samples based on similarity of independent variables, enabling a quick grasp of the scientific question at the core of the experiment even for large studies. We support the ISA-Tab standard, enabling visualization of diverse omics experiments. As part of our platform for data-driven biomedical research, our implementation offers additional features to detect the status of data generation and more.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Recent publications have shown that the majority of studies cannot be adequately reproduced. The underlying causes seem to be diverse. Usage of the wrong statistical tools can lead to the reporting of dubious correlations as significant results. Missing information from lab protocols or other metadata can make verification impossible. Especially with the advent of Big Data in the life sciences and the hereby-involved measurement of thousands of multi-omics samples, researchers depend more than ever on adequate metadata annotation. In recent years, the scientific community has created multiple experimental design standards, which try to define the minimum information necessary to make experiments reproducible. Tools help with creation or analysis of this abundance of metadata, but are often still based on spreadsheet formats and lack intuitive visualizations. We present an interactive graph visualization tailored to experiments using a factorial experimental design. Our solution summarizes sample sources and extracted samples based on similarity of independent variables, enabling a quick grasp of the scientific question at the core of the experiment even for large studies. We support the ISA-Tab standard, enabling visualization of diverse omics experiments. As part of our platform for data-driven biomedical research, our implementation offers additional features to detect the status of data generation and more. |
Audain, Enrique; Uszkoreit, Julian; Sachsenberg, Timo; Pfeuffer, Julianus; Liang, Xiao; Hermjakob, Henning; Sanchez, Aniel; Eisenacher, Martin; Reinert, Knut; Tabb, David L; Kohlbacher, Oliver; Perez-Riverol, Yasset In-depth analysis of protein inference algorithms using multiple search engines and well-defined metrics J. Proteomics, 150 , pp. 170–182, 2017. @article{PIA_JProt_2016, title = {In-depth analysis of protein inference algorithms using multiple search engines and well-defined metrics}, author = {Enrique Audain and Julian Uszkoreit and Timo Sachsenberg and Julianus Pfeuffer and Xiao Liang and Henning Hermjakob and Aniel Sanchez and Martin Eisenacher and Knut Reinert and David L Tabb and Oliver Kohlbacher and Yasset Perez-Riverol}, url = {https://doi.org/10.1016/j.jprot.2016.08.002}, year = {2017}, date = {2017-01-01}, journal = {J. Proteomics}, volume = {150}, pages = {170–182}, abstract = {In mass spectrometry-based shotgun proteomics, protein identifications are usually the desired result. However, most of the analytical methods are based on the identification of reliable peptides and not the direct identification of intact proteins. Thus, assembling peptides identified from tandem mass spectra into a list of proteins, referred to as protein inference, is a critical step in proteomics research. Currently, different protein inference algorithms and tools are available for the proteomics community. Here, we evaluated five software tools for protein inference (PIA, ProteinProphet, Fido, ProteinLP, MSBayesPro) using three popular database search engines: Mascot, X!Tandem, and MS-GF+. All the algorithms were evaluated using a highly customizable KNIME workflow using four different public datasets with varying complexities (different sample preparation, species and analytical instruments). We defined a set of quality control metrics to evaluate the performance of each combination of search engines, protein inference algorithm, and parameters on each dataset. We show that the results for complex samples vary not only regarding the actual numbers of reported protein groups but also concerning the actual composition of groups. Furthermore, the robustness of reported proteins when using databases of differing complexities is strongly dependant on the applied inference algorithm. Finally, merging the identifications of multiple search engines does not necessarily increase the number of reported proteins, but does increase the number of peptides per protein and thus can generally be recommended. SIGNIFICANCE: Protein inference is one of the major challenges in MS-based proteomics nowadays. Currently, there are a vast number of protein inference algorithms and implementations available for the proteomics community. Protein assembly impacts in the final results of the research, the quantitation values and the final claims in the research manuscript. Even though protein inference is a crucial step in proteomics data analysis, a comprehensive evaluation of the many different inference methods has never been performed. Previously Journal of proteomics has published multiple studies about other benchmark of bioinformatics algorithms (PMID: 26585461; PMID: 22728601) in proteomics studies making clear the importance of those studies for the proteomics community and the journal audience. This manuscript presents a new bioinformatics solution based on the KNIME/OpenMS platform that aims at providing a fair comparison of protein inference algorithms (https://github.com/KNIME-OMICS). Six different algorithms - ProteinProphet, MSBayesPro, ProteinLP, Fido and PIA- were evaluated using the highly customizable workflow on four public datasets with varying complexities. Five popular database search engines Mascot, X!Tandem, MS-GF+ and combinations thereof were evaluated for every protein inference tool. In total >186 proteins lists were analyzed and carefully compare using three metrics for quality assessments of the protein inference results: 1) the numbers of reported proteins, 2) peptides per protein, and the 3) number of uniquely reported proteins per inference method, to address the quality of each inference method. We also examined how many proteins were reported by choosing each combination of search engines, protein inference algorithms and parameters on each dataset. The results show that using 1) PIA or Fido seems to be a good choice when studying the results of the analyzed workflow, regarding not only the reported proteins and the high-quality identifications, but also the required runtime. 2) Merging the identifications of multiple search engines gives almost always more confident results and increases the number of peptides per protein group. 3) The usage of databases containing not only the canonical, but also known isoforms of proteins has a small impact on the number of reported proteins. The detection of specific isoforms could, concerning the question behind the study, compensate for slightly shorter reports using the parsimonious reports. 4) The current workflow can be easily extended to support new algorithms and search engine combinations.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In mass spectrometry-based shotgun proteomics, protein identifications are usually the desired result. However, most of the analytical methods are based on the identification of reliable peptides and not the direct identification of intact proteins. Thus, assembling peptides identified from tandem mass spectra into a list of proteins, referred to as protein inference, is a critical step in proteomics research. Currently, different protein inference algorithms and tools are available for the proteomics community. Here, we evaluated five software tools for protein inference (PIA, ProteinProphet, Fido, ProteinLP, MSBayesPro) using three popular database search engines: Mascot, X!Tandem, and MS-GF+. All the algorithms were evaluated using a highly customizable KNIME workflow using four different public datasets with varying complexities (different sample preparation, species and analytical instruments). We defined a set of quality control metrics to evaluate the performance of each combination of search engines, protein inference algorithm, and parameters on each dataset. We show that the results for complex samples vary not only regarding the actual numbers of reported protein groups but also concerning the actual composition of groups. Furthermore, the robustness of reported proteins when using databases of differing complexities is strongly dependant on the applied inference algorithm. Finally, merging the identifications of multiple search engines does not necessarily increase the number of reported proteins, but does increase the number of peptides per protein and thus can generally be recommended. SIGNIFICANCE: Protein inference is one of the major challenges in MS-based proteomics nowadays. Currently, there are a vast number of protein inference algorithms and implementations available for the proteomics community. Protein assembly impacts in the final results of the research, the quantitation values and the final claims in the research manuscript. Even though protein inference is a crucial step in proteomics data analysis, a comprehensive evaluation of the many different inference methods has never been performed. Previously Journal of proteomics has published multiple studies about other benchmark of bioinformatics algorithms (PMID: 26585461; PMID: 22728601) in proteomics studies making clear the importance of those studies for the proteomics community and the journal audience. This manuscript presents a new bioinformatics solution based on the KNIME/OpenMS platform that aims at providing a fair comparison of protein inference algorithms (https://github.com/KNIME-OMICS). Six different algorithms - ProteinProphet, MSBayesPro, ProteinLP, Fido and PIA- were evaluated using the highly customizable workflow on four public datasets with varying complexities. Five popular database search engines Mascot, X!Tandem, MS-GF+ and combinations thereof were evaluated for every protein inference tool. In total >186 proteins lists were analyzed and carefully compare using three metrics for quality assessments of the protein inference results: 1) the numbers of reported proteins, 2) peptides per protein, and the 3) number of uniquely reported proteins per inference method, to address the quality of each inference method. We also examined how many proteins were reported by choosing each combination of search engines, protein inference algorithms and parameters on each dataset. The results show that using 1) PIA or Fido seems to be a good choice when studying the results of the analyzed workflow, regarding not only the reported proteins and the high-quality identifications, but also the required runtime. 2) Merging the identifications of multiple search engines gives almost always more confident results and increases the number of peptides per protein group. 3) The usage of databases containing not only the canonical, but also known isoforms of proteins has a small impact on the number of reported proteins. The detection of specific isoforms could, concerning the question behind the study, compensate for slightly shorter reports using the parsimonious reports. 4) The current workflow can be easily extended to support new algorithms and search engine combinations. |
Backert, Linus; Kowalewski, Daniel; Walz, Simon; Schuster, Heiko; Berlin, Claudia; Neidert, Marian; Schemionek, Mirle; Brümmendorf, Tim Hendrik; Vicinic, Vladan; Niederwieser, Dietger; Kanz, Lothar; Salih, Helmut Rainer; Kohlbacher, Oliver; Weisel, Katja; Rammensee, Hans-Georg; Stevanovic, Stefan; Walz, Juliana Sarah A meta-analysis of HLA peptidome composition in different hematological entities: Entity-specific dividing lines and Oncotarget, 8 (27), pp. 43915-43924., 2017. @article{panLeukemiaAntigens_Oncotargets2016, title = {A meta-analysis of HLA peptidome composition in different hematological entities: Entity-specific dividing lines and }, author = {Linus Backert and Daniel Kowalewski and Simon Walz and Heiko Schuster and Claudia Berlin and Marian Neidert and Mirle Schemionek and Tim Hendrik Brümmendorf and Vladan Vicinic and Dietger Niederwieser and Lothar Kanz and Helmut Rainer Salih and Oliver Kohlbacher and Katja Weisel and Hans-Georg Rammensee and Stefan Stevanovic and Juliana Sarah Walz}, url = {https://doi.org/10.18632/oncotarget.14918}, year = {2017}, date = {2017-01-01}, journal = {Oncotarget}, volume = {8}, number = {27}, pages = {43915-43924.}, abstract = {Hematological malignancies (HM) are highly amenable targets for immunotherapeutic intervention and may be effectively treated by antigen-specific T-cell based treatment. Recent studies demonstrate that physiologically occurring anti-cancer T-cell responses in certain HM entities target broadly presented non-mutated epitopes. HLA ligands are thus implied as prime targets for broadly applicable and antigen-specific off-the-shelf compounds. With the aim of assessing the presence of common targets shared among different HM which may enable addressing a larger patient collective we conducted a meta-analysis of 83 mass spectrometry-based HLA peptidome datasets (comprising 40,361 unique peptide identifications) across four major HM (19 AML, 16 CML, 35 CLL, and 13 MM/MCL samples) and investigated similarities and differences within the HLA presented antigenic landscape. We found the cancer HLA peptidome datasets to cluster specifically along entity and lineage lines, suggesting that the immunopeptidome directly reflects the differences in the underlying (tumor-)biology. In line with these findings, we only detected a small set of entity-spanning antigens, which were predominantly characterized by low presentation frequencies within the different patient cohorts. These findings suggest that design of T-cell immunotherapies for the treatment of HM should ideally be conducted in an entity-specific fashion.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Hematological malignancies (HM) are highly amenable targets for immunotherapeutic intervention and may be effectively treated by antigen-specific T-cell based treatment. Recent studies demonstrate that physiologically occurring anti-cancer T-cell responses in certain HM entities target broadly presented non-mutated epitopes. HLA ligands are thus implied as prime targets for broadly applicable and antigen-specific off-the-shelf compounds. With the aim of assessing the presence of common targets shared among different HM which may enable addressing a larger patient collective we conducted a meta-analysis of 83 mass spectrometry-based HLA peptidome datasets (comprising 40,361 unique peptide identifications) across four major HM (19 AML, 16 CML, 35 CLL, and 13 MM/MCL samples) and investigated similarities and differences within the HLA presented antigenic landscape. We found the cancer HLA peptidome datasets to cluster specifically along entity and lineage lines, suggesting that the immunopeptidome directly reflects the differences in the underlying (tumor-)biology. In line with these findings, we only detected a small set of entity-spanning antigens, which were predominantly characterized by low presentation frequencies within the different patient cohorts. These findings suggest that design of T-cell immunotherapies for the treatment of HM should ideally be conducted in an entity-specific fashion. |
Haen, Sebastian P; Groh, Christiane; Schumm, Michael; Backert, Linus; Löffler, Markus W; Federmann, Birgit; Faul, Christoph; Dörfel, Daniela; Vogel, Wichard; Handgretinger, Rupert; Kanz, Lothar; Bethge, Wolfgang A Haploidentical hematopoietic cell transplantation using in vitro T cell depleted grafts as salvage therapy in patients with disease relapse after prior allogeneic transplantation Annals of Hematology, pp. 1-11, 2017. @article{Haen2017, title = {Haploidentical hematopoietic cell transplantation using in vitro T cell depleted grafts as salvage therapy in patients with disease relapse after prior allogeneic transplantation}, author = {Sebastian P Haen and Christiane Groh and Michael Schumm and Linus Backert and Markus W Löffler and Birgit Federmann and Christoph Faul and Daniela Dörfel and Wichard Vogel and Rupert Handgretinger and Lothar Kanz and Wolfgang A Bethge}, url = {http://dx.doi.org/10.1007/s00277-017-2941-x}, year = {2017}, date = {2017-01-01}, journal = {Annals of Hematology}, pages = {1-11}, abstract = {Disease relapse after one or more allogeneic hematopoietic cell transplantations (HCT) represents a therapeutic challenge with all options bearing a significant morbidity and mortality. Haploidentical HCT may induce more pronounced anti-leukemic effects and was evaluated at our center in 25 consecutive patients with disease relapse after preceding HCT receiving haploidentical grafts after in vitro T cell depletion. Overall survival at 1 and 2 years was 32 and 14%, respectively. Of note, patients with complete remission (CR) before haploidentical HCT had a very favorable overall survival of 41.7% at 2 years. Cumulative incidence of non-relapse mortality was 36 and 40% at 1 and 2 years, respectively. With a cumulative incidence for relapse of 36 and 45.6% at 1 and 2 years, disease-free survival (DFS) was 28 and 14.4%, respectively. Here also, patients with CR before haploidentical HCT had a favorable DFS of 42% at 2 years. Only very limited acute (11 patients (44%) with a median grade 1) and chronic graft versus host disease (GvHD) (5 patients (11%), limited grade only) was observed. The main complications and causes of death comprised - besides relapse - infections and bleeding complications. Hence, haploidentical HCT can achieve long-term survival comparable to second transplantation with matched or mismatched donors for patients with otherwise deleterious prognosis and should be considered as a treatment option for patients experiencing disease relapse after previous allogeneic HCT.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Disease relapse after one or more allogeneic hematopoietic cell transplantations (HCT) represents a therapeutic challenge with all options bearing a significant morbidity and mortality. Haploidentical HCT may induce more pronounced anti-leukemic effects and was evaluated at our center in 25 consecutive patients with disease relapse after preceding HCT receiving haploidentical grafts after in vitro T cell depletion. Overall survival at 1 and 2 years was 32 and 14%, respectively. Of note, patients with complete remission (CR) before haploidentical HCT had a very favorable overall survival of 41.7% at 2 years. Cumulative incidence of non-relapse mortality was 36 and 40% at 1 and 2 years, respectively. With a cumulative incidence for relapse of 36 and 45.6% at 1 and 2 years, disease-free survival (DFS) was 28 and 14.4%, respectively. Here also, patients with CR before haploidentical HCT had a favorable DFS of 42% at 2 years. Only very limited acute (11 patients (44%) with a median grade 1) and chronic graft versus host disease (GvHD) (5 patients (11%), limited grade only) was observed. The main complications and causes of death comprised - besides relapse - infections and bleeding complications. Hence, haploidentical HCT can achieve long-term survival comparable to second transplantation with matched or mismatched donors for patients with otherwise deleterious prognosis and should be considered as a treatment option for patients experiencing disease relapse after previous allogeneic HCT. |
Nelde, Annika; Kowalewski, Daniel J; Backert, Linus; Schuster, Heiko; Werner, Jan-Ole; Klein, Reinhild; Kohlbacher, Oliver; Kanz, Lothar; Salih, Helmut R; Rammensee, Hans-Georg; Stevanović, Stefan; Stickel, Juliane S HLA ligandome analysis of primary chronic lymphocytic leukemia (CLL) cells under lenalidomide treatment confirms the suitability of lenalidomide for combination with T-cell based immunotherapy OncoImmunol., 5 (12), pp. e1249560, 2017. @article{Walz_OncoImmuno_2017, title = {HLA ligandome analysis of primary chronic lymphocytic leukemia (CLL) cells under lenalidomide treatment confirms the suitability of lenalidomide for combination with T-cell based immunotherapy}, author = {Annika Nelde and Daniel J Kowalewski and Linus Backert and Heiko Schuster and Jan-Ole Werner and Reinhild Klein and Oliver Kohlbacher and Lothar Kanz and Helmut R Salih and Hans-Georg Rammensee and Stefan Stevanović and Juliane S Stickel}, url = {https://doi.org/10.1080/2162402X.2017.1316438}, year = {2017}, date = {2017-01-01}, journal = {OncoImmunol.}, volume = {5}, number = {12}, pages = {e1249560}, abstract = {We recently completed a phase I/IIa trial of RNActive® CV9201, a novel mRNA-based therapeutic vaccine targeting five tumor-associated antigens in non-small cell lung cancer (NSCLC) patients. The aim of the study presented here was to comprehensively analyze changes in peripheral blood during the vaccination period and to generate hypotheses facilitating the identification of potential biomarkers correlating with differential clinical outcomes post RNActive® immunotherapy. We performed whole-genome expression profiling in a subgroup of 22 stage IV NSCLC patients before and after initiation of treatment with CV9201. Utilizing an analytic approach based on blood transcriptional modules (BTMs), a previously described, sensitive tool for blood transcriptome data analysis, patients segregated into two major clusters based on transcriptional changes post RNActive® treatment. The first group of patients was characterized by the upregulation of an expression signature associated with myeloid cells and inflammation, whereas the other group exhibited an expression signature associated with T and NK cells. Patients with an enrichment of T and NK cell modules after treatment compared to baseline exhibited significantly longer progression-free and overall survival compared to patients with an upregulation of myeloid cell and inflammatory modules. Notably, these gene expression signatures were mutually exclusive and inversely correlated. Furthermore, our findings correlated with phenotypic data derived by flow cytometry as well as the neutrophil-to-lymphocyte ratio. Our study thus demonstrates non-overlapping, distinct transcriptional profiles correlating with survival warranting further validation for the development of biomarker candidates for mRNA-based immunotherapy.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We recently completed a phase I/IIa trial of RNActive® CV9201, a novel mRNA-based therapeutic vaccine targeting five tumor-associated antigens in non-small cell lung cancer (NSCLC) patients. The aim of the study presented here was to comprehensively analyze changes in peripheral blood during the vaccination period and to generate hypotheses facilitating the identification of potential biomarkers correlating with differential clinical outcomes post RNActive® immunotherapy. We performed whole-genome expression profiling in a subgroup of 22 stage IV NSCLC patients before and after initiation of treatment with CV9201. Utilizing an analytic approach based on blood transcriptional modules (BTMs), a previously described, sensitive tool for blood transcriptome data analysis, patients segregated into two major clusters based on transcriptional changes post RNActive® treatment. The first group of patients was characterized by the upregulation of an expression signature associated with myeloid cells and inflammation, whereas the other group exhibited an expression signature associated with T and NK cells. Patients with an enrichment of T and NK cell modules after treatment compared to baseline exhibited significantly longer progression-free and overall survival compared to patients with an upregulation of myeloid cell and inflammatory modules. Notably, these gene expression signatures were mutually exclusive and inversely correlated. Furthermore, our findings correlated with phenotypic data derived by flow cytometry as well as the neutrophil-to-lymphocyte ratio. Our study thus demonstrates non-overlapping, distinct transcriptional profiles correlating with survival warranting further validation for the development of biomarker candidates for mRNA-based immunotherapy. |
Heimgärtner, Florian; Hettich, Stefan; Kohlbacher, Oliver; Menth, Michael Scaling Home Automation to Public Buildings: A Distributed Multiuser Setup for OpenHAB 2 Global Internet of Things Summit (GIoTS) 2017, 2017. @conference{GIoTS2017, title = {Scaling Home Automation to Public Buildings: A Distributed Multiuser Setup for OpenHAB 2}, author = {Florian Heimgärtner and Stefan Hettich and Oliver Kohlbacher and Michael Menth}, url = {http://dx.doi.org/10.1109/GIOTS.2017.8016235}, year = {2017}, date = {2017-01-01}, booktitle = {Global Internet of Things Summit (GIoTS) 2017}, abstract = {Home automation systems can help to reduce energy costs and increase comfort of living by adjusting room temperatures according to schedules, rules, and sensor input. OpenHAB 2 is an open-source home automation framework supporting various home automation technologies and devices. While OpenHAB is well suited for single occupancy homes, large public buildings pose additional challenges. The limited range of wireless home automation technologies requires transceivers distributed across the building. Additionally, control permissions need to be restricted to authorized persons. This work presents OpenHAB-DM, a distributed OpenHAB 2 setup with extensions introducing user authentication, access control, and management tools for decentralized OpenHAB node deployment.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Home automation systems can help to reduce energy costs and increase comfort of living by adjusting room temperatures according to schedules, rules, and sensor input. OpenHAB 2 is an open-source home automation framework supporting various home automation technologies and devices. While OpenHAB is well suited for single occupancy homes, large public buildings pose additional challenges. The limited range of wireless home automation technologies requires transceivers distributed across the building. Additionally, control permissions need to be restricted to authorized persons. This work presents OpenHAB-DM, a distributed OpenHAB 2 setup with extensions introducing user authentication, access control, and management tools for decentralized OpenHAB node deployment. |
Schubert, Benjamin; de la Garza, Luis; Mohr, Christopher; Walzer, Mathias; Kohlbacher, Oliver ImmunoNodes - Graphical Development of Complex Immunoinformatics Workflows BMC Bioinformatics, 18 (1), pp. 242, 2017. @article{ImmunoNodes2017, title = {ImmunoNodes - Graphical Development of Complex Immunoinformatics Workflows}, author = {Benjamin Schubert and Luis de la Garza and Christopher Mohr and Mathias Walzer and Oliver Kohlbacher}, url = {https://doi.org/10.1186/s12859-017-1667-z}, year = {2017}, date = {2017-01-01}, journal = {BMC Bioinformatics}, volume = {18}, number = {1}, pages = {242}, abstract = {BACKGROUND: Immunoinformatics has become a crucial part in biomedical research. Yet many immunoinformatics tools have command line interfaces only and can be difficult to install. Web-based immunoinformatics tools, on the other hand, are difficult to integrate with other tools, which is typically required for the complex analysis and prediction pipelines required for advanced applications. RESULT: We present ImmunoNodes, an immunoinformatics toolbox that is fully integrated into the visual workflow environment KNIME. By dragging and dropping tools and connecting them to indicate the data flow through the pipeline, it is possible to construct very complex workflows without the need for coding. CONCLUSION: ImmunoNodes allows users to build complex workflows with an easy to use and intuitive interface with a few clicks on any desktop computer.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND: Immunoinformatics has become a crucial part in biomedical research. Yet many immunoinformatics tools have command line interfaces only and can be difficult to install. Web-based immunoinformatics tools, on the other hand, are difficult to integrate with other tools, which is typically required for the complex analysis and prediction pipelines required for advanced applications. RESULT: We present ImmunoNodes, an immunoinformatics toolbox that is fully integrated into the visual workflow environment KNIME. By dragging and dropping tools and connecting them to indicate the data flow through the pipeline, it is possible to construct very complex workflows without the need for coding. CONCLUSION: ImmunoNodes allows users to build complex workflows with an easy to use and intuitive interface with a few clicks on any desktop computer. |
Vizcaino, Juan Antonio; Mayer, Gerhard; Perkins, Simon R; Barnsnes, Harald; Vaudel, Marc; Perez-Riverol, Yasset; Ternent, Tobias; Uszkoreit, Julian; Eisenacher, Martin; Fischer, Lutz; Rappsilber, Juri; Netz, Eugen; Walzer, Mathias; Kohlbacher, Oliver; Leitner, Alexander; Chalkley, Robert J; Ghali, Fawaz; Martínez-Bartolomé, Salvador; Deutsch, Eric W; Jones, Andrew R The mzIdentML data standard version 1.2, supporting advances in proteome informatics Mol. Cell. Prot., 16 (7), pp. 1275-1285, 2017. @article{mzIdML1.2_2017, title = {The mzIdentML data standard version 1.2, supporting advances in proteome informatics}, author = {Juan Antonio Vizcaino and Gerhard Mayer and Simon R Perkins and Harald Barnsnes and Marc Vaudel and Yasset Perez-Riverol and Tobias Ternent and Julian Uszkoreit and Martin Eisenacher and Lutz Fischer and Juri Rappsilber and Eugen Netz and Mathias Walzer and Oliver Kohlbacher and Alexander Leitner and Robert J Chalkley and Fawaz Ghali and Salvador Martínez-Bartolomé and Eric W Deutsch and Andrew R Jones}, url = {https://doi.org/10.1074/mcp.M117.068429}, year = {2017}, date = {2017-01-01}, journal = {Mol. Cell. Prot.}, volume = {16}, number = {7}, pages = {1275-1285}, abstract = {The first stable version of the Proteomics Standards Initiative mzIdentML open data standard (version 1.1) was published in 2012 - capturing the outputs of peptide and protein identification software. In the intervening years, the standard has become well supported in both commercial and open software, as well as a submission and download format for public repositories. Here we report a new release of mzIdentML (version 1.2) that is required to keep pace with emerging practice in proteome informatics. New features have been added to support: (i) scores associated with localization of modifications on peptides; (ii) statistics performed at the level of peptides; (iii) identification of cross-linked peptides; and (iv) support for proteogenomics approaches. In addition, there is now improved support for the encoding of de novo sequencing of peptides, spectral library searches and protein inference. As a key point, the underlying XML schema has only undergone very minor modifications to simplify as much as possible the transition from version 1.1 to version 1.2 for implementers, but there have been several notable updates to the format specification, implementation guidelines, controlled vocabularies and validation software. mzIdentML 1.2 can be described as backwards compatible, in that reading software designed for mzIdentML 1.1 should function in most cases without adaptation. We anticipate that these developments will provide a continued stable base for software teams working to implement the standard. All the related documentation is accessible at http://www.psidev.info/mzidentml.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The first stable version of the Proteomics Standards Initiative mzIdentML open data standard (version 1.1) was published in 2012 - capturing the outputs of peptide and protein identification software. In the intervening years, the standard has become well supported in both commercial and open software, as well as a submission and download format for public repositories. Here we report a new release of mzIdentML (version 1.2) that is required to keep pace with emerging practice in proteome informatics. New features have been added to support: (i) scores associated with localization of modifications on peptides; (ii) statistics performed at the level of peptides; (iii) identification of cross-linked peptides; and (iv) support for proteogenomics approaches. In addition, there is now improved support for the encoding of de novo sequencing of peptides, spectral library searches and protein inference. As a key point, the underlying XML schema has only undergone very minor modifications to simplify as much as possible the transition from version 1.1 to version 1.2 for implementers, but there have been several notable updates to the format specification, implementation guidelines, controlled vocabularies and validation software. mzIdentML 1.2 can be described as backwards compatible, in that reading software designed for mzIdentML 1.1 should function in most cases without adaptation. We anticipate that these developments will provide a continued stable base for software teams working to implement the standard. All the related documentation is accessible at http://www.psidev.info/mzidentml. |
Pfeuffer, Julianus U; Sachsenberg, Timo; Alka, Oliver; Walzer, Mathias; Fillbrunn, Alexander; Nilse, Lars; Schilling, Oliver; Reinert, Knut; Kohlbacher, Oliver OpenMS - A platform for reproducible analysis of mass spectrometry data J. Biotechnol., pp. S0168-1656(17)30251-1, 2017. @article{OpenMSJBiotech2017, title = {OpenMS - A platform for reproducible analysis of mass spectrometry data}, author = {Julianus U Pfeuffer and Timo Sachsenberg and Oliver Alka and Mathias Walzer and Alexander Fillbrunn and Lars Nilse and Oliver Schilling and Knut Reinert and Oliver Kohlbacher}, url = {https://doi.org/10.1016/j.jbiotec.2017.05.016}, year = {2017}, date = {2017-01-01}, journal = {J. Biotechnol.}, pages = {S0168-1656(17)30251-1}, abstract = {In recent years, several mass spectrometry-based omics technologies emerged to investigate qualitative and quantitative changes within thousands of biologically active components such as proteins, lipids and metabolites. The research enabled through these methods potentially contributes to the diagnosis and pathophysiology of human diseases as well as to the clarification of structures and interactions between biomolecules. Simultaneously, technological advances in the field of mass spectrometry leading to an ever increasing amount of data, demand high standards in efficiency, accuracy and reproducibility of potential analysis software. RESULTS: This article presents the current state and ongoing developments in OpenMS, a versatile open-source framework aimed at enabling reproducible analyses of high-throughput mass spectrometry data. It provides implementations of frequently occurring processing operations on MS data through a clean application programming interface in C++ and Python. A collection of 185 tools and ready-made workflows for typical MS-based experiments enable convenient analyses for non-developers and facilitate reproducible research without losing flexibility. CONCLUSIONS: OpenMS will continue to increase its ease of use for developers as well as users with improved continuous integration/deployment strategies, regular trainings with updated training materials and multiple sources of support. The active developer community ensures the incorporation of new features to support state of the art research.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In recent years, several mass spectrometry-based omics technologies emerged to investigate qualitative and quantitative changes within thousands of biologically active components such as proteins, lipids and metabolites. The research enabled through these methods potentially contributes to the diagnosis and pathophysiology of human diseases as well as to the clarification of structures and interactions between biomolecules. Simultaneously, technological advances in the field of mass spectrometry leading to an ever increasing amount of data, demand high standards in efficiency, accuracy and reproducibility of potential analysis software. RESULTS: This article presents the current state and ongoing developments in OpenMS, a versatile open-source framework aimed at enabling reproducible analyses of high-throughput mass spectrometry data. It provides implementations of frequently occurring processing operations on MS data through a clean application programming interface in C++ and Python. A collection of 185 tools and ready-made workflows for typical MS-based experiments enable convenient analyses for non-developers and facilitate reproducible research without losing flexibility. CONCLUSIONS: OpenMS will continue to increase its ease of use for developers as well as users with improved continuous integration/deployment strategies, regular trainings with updated training materials and multiple sources of support. The active developer community ensures the incorporation of new features to support state of the art research. |
Chevrette, Marc G; Aicheler, Fabian; Kohlbacher, Oliver; Currie, Cameron R; Medema, Marnix H SANDPUMA: Ensemble Predictions of Nonribosomal Peptide Chemistry Reveals Biosynthetic Diversity across Actinobacteria Bioinformatics, 33 (20), pp. 3202-3210, 2017. @article{SANDPUMA2017, title = {SANDPUMA: Ensemble Predictions of Nonribosomal Peptide Chemistry Reveals Biosynthetic Diversity across Actinobacteria}, author = {Marc G Chevrette and Fabian Aicheler and Oliver Kohlbacher and Cameron R Currie and Marnix H Medema}, url = {https://doi.org/10.1093/bioinformatics/btx400}, year = {2017}, date = {2017-01-01}, journal = {Bioinformatics}, volume = {33}, number = {20}, pages = {3202-3210}, abstract = {Summary: Nonribosomally synthesized peptides (NRPs) are natural products with widespread applications in medicine and biotechnology. Many algorithms have been developed to predict the substrate specificities of nonribosomal peptide synthetase adenylation (A) domains from DNA sequences, which enables prioritization and dereplication, and integration with other data types in discovery efforts. However, insufficient training data and a lack of clarity regarding prediction quality have impeded optimal use. Here, we introduce prediCAT, a new phylogenetics-inspired algorithm, which quantitatively estimates the degree of predictability of each A-domain. We then systematically benchmarked all algorithms on a newly-gathered, independent test set of 434 A-domain sequences, showing that active-site-motif-based algorithms outperform whole-domain-based methods. Subsequently, we developed SANDPUMA, a powerful ensemble algorithm, based on newly-trained versions of all high-performing algorithms, which significantly outperforms individual methods. Finally, we deployed SANDPUMA in a systematic investigation of 7,635 Actinobacteria genomes, suggesting that NRP chemical diversity is much higher than previously estimated. SANDPUMA has been integrated into the widely-used antiSMASH biosynthetic gene cluster analysis pipeline and is also available as an open-source, standalone tool. Availability: SANDPUMA is freely available at https://bitbucket.org/chevrm/sandpuma and as a docker image at https://hub.docker.com/r/chevrm/sandpuma/ under the GNU Public License 3 (GPL3). Contact: , }, keywords = {}, pubstate = {published}, tppubtype = {article} } Summary: Nonribosomally synthesized peptides (NRPs) are natural products with widespread applications in medicine and biotechnology. Many algorithms have been developed to predict the substrate specificities of nonribosomal peptide synthetase adenylation (A) domains from DNA sequences, which enables prioritization and dereplication, and integration with other data types in discovery efforts. However, insufficient training data and a lack of clarity regarding prediction quality have impeded optimal use. Here, we introduce prediCAT, a new phylogenetics-inspired algorithm, which quantitatively estimates the degree of predictability of each A-domain. We then systematically benchmarked all algorithms on a newly-gathered, independent test set of 434 A-domain sequences, showing that active-site-motif-based algorithms outperform whole-domain-based methods. Subsequently, we developed SANDPUMA, a powerful ensemble algorithm, based on newly-trained versions of all high-performing algorithms, which significantly outperforms individual methods. Finally, we deployed SANDPUMA in a systematic investigation of 7,635 Actinobacteria genomes, suggesting that NRP chemical diversity is much higher than previously estimated. SANDPUMA has been integrated into the widely-used antiSMASH biosynthetic gene cluster analysis pipeline and is also available as an open-source, standalone tool. Availability: SANDPUMA is freely available at https://bitbucket.org/chevrm/sandpuma and as a docker image at https://hub.docker.com/r/chevrm/sandpuma/ under the GNU Public License 3 (GPL3). Contact: , |
Vizcaíno, Juan Antonio; Walzer, Mathias; Jiménez, Rafael C; Bittremieux, Wout; Bouyssie, David; Carapito, Christine; Corrales, Fernando; Ferro, Myriam; Heck, Albert J R; Horvatovich, Peter; Hubalek, Martin; Lane, Lydia; Laukens, Kris; Levander, Fredrik; Lisacek, Frederique; Novak, Petr; Palmblad, Magnus; Piovesan, Damiano; Pühler, Alfred; Schwämmle, Veit; Valkenborg, Dirk; van Rijswijk, Merlijn; Vondrasek, Jiri; Eisenacher, Martin; Martens, Lennart; Kohlbacher, Oliver A community proposal to integrate proteomics activities in ELIXIR F1000Research, 6 , pp. 875, 2017. @article{ProteomicsELIXIR2017, title = {A community proposal to integrate proteomics activities in ELIXIR}, author = {Juan Antonio Vizcaíno and Mathias Walzer and Rafael C Jiménez and Wout Bittremieux and David Bouyssie and Christine Carapito and Fernando Corrales and Myriam Ferro and Albert J R Heck and Peter Horvatovich and Martin Hubalek and Lydia Lane and Kris Laukens and Fredrik Levander and Frederique Lisacek and Petr Novak and Magnus Palmblad and Damiano Piovesan and Alfred Pühler and Veit Schwämmle and Dirk Valkenborg and Merlijn van Rijswijk and Jiri Vondrasek and Martin Eisenacher and Lennart Martens and Oliver Kohlbacher}, url = {https://doi.org/10.12688/f1000research.11751.1}, year = {2017}, date = {2017-01-01}, journal = {F1000Research}, volume = {6}, pages = {875}, abstract = {Computational approaches have been major drivers behind the progress of proteomics in recent years. The aim of this white paper is to provide a framework for integrating computational proteomics into ELIXIR in the near future, and thus to broaden the portfolio of omics technologies supported by this European distributed infrastructure. This white paper is the direct result of a strategy meeting on ‘The Future of Proteomics in ELIXIR’ that took place in March 2017 in Tübingen (Germany), and involved representatives of eleven ELIXIR nodes. These discussions led to a list of priority areas in computational proteomics that would complement existing activities and close gaps in the portfolio of tools and services offered by ELIXIR so far. We provide some suggestions on how these activities could be integrated into ELIXIR’s existing platforms, and how it could lead to a new ELIXIR use case in proteomics. We also highlight connections to the related field of metabolomics, where similar activities are ongoing. This white paper could thus serve as a starting point for the integration of computational proteomics into ELIXIR. Over the next few months we will be working closely with all stakeholders involved, and in particular with other representatives of the proteomics community, to further refine this paper.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Computational approaches have been major drivers behind the progress of proteomics in recent years. The aim of this white paper is to provide a framework for integrating computational proteomics into ELIXIR in the near future, and thus to broaden the portfolio of omics technologies supported by this European distributed infrastructure. This white paper is the direct result of a strategy meeting on ‘The Future of Proteomics in ELIXIR’ that took place in March 2017 in Tübingen (Germany), and involved representatives of eleven ELIXIR nodes. These discussions led to a list of priority areas in computational proteomics that would complement existing activities and close gaps in the portfolio of tools and services offered by ELIXIR so far. We provide some suggestions on how these activities could be integrated into ELIXIR’s existing platforms, and how it could lead to a new ELIXIR use case in proteomics. We also highlight connections to the related field of metabolomics, where similar activities are ongoing. This white paper could thus serve as a starting point for the integration of computational proteomics into ELIXIR. Over the next few months we will be working closely with all stakeholders involved, and in particular with other representatives of the proteomics community, to further refine this paper. |
Alberer, Martin; Gnad-Vogt, Ulrike; Hong, Henoch Sangjoon; Mehr, Keyvan Tadjalli; Backert, Linus; Finak, Greg; Gottardo, Raphael; Bica, Mihai Alexandru; Garofano, Aurelio; Koch, Sven Dominik; Fotin-Mleczek, Mariola; Hoerr, Ingmar; Clemens, Ralf; von Sonnenburg, Frank Safety and immunogenicity of a mRNA rabies vaccine in healthy adults: an open-label, non-randomised, prospective, first-in-human phase 1 clinical trial The Lancet, 390 (10101), pp. 1511-1520, 2017. @article{Alberer2017, title = {Safety and immunogenicity of a mRNA rabies vaccine in healthy adults: an open-label, non-randomised, prospective, first-in-human phase 1 clinical trial}, author = {Martin Alberer and Ulrike Gnad-Vogt and Henoch Sangjoon Hong and Keyvan Tadjalli Mehr and Linus Backert and Greg Finak and Raphael Gottardo and Mihai Alexandru Bica and Aurelio Garofano and Sven Dominik Koch and Mariola Fotin-Mleczek and Ingmar Hoerr and Ralf Clemens and Frank von Sonnenburg}, url = {http://linkinghub.elsevier.com/retrieve/pii/S0140673617316653}, year = {2017}, date = {2017-01-01}, journal = {The Lancet}, volume = {390}, number = {10101}, pages = {1511-1520}, abstract = {BACKGROUND: Vaccines based on mRNA coding for antigens have been shown to be safe and immunogenic in preclinical models. We aimed to report results of the first-in-human proof-of-concept clinical trial in healthy adults of a prophylactic mRNA-based vaccine encoding rabies virus glycoprotein (CV7201). METHODS: We did an open-label, uncontrolled, prospective, phase 1 clinical trial at one centre in Munich, Germany. Healthy male and female volunteers (aged 18-40 years) with no history of rabies vaccination were sequentially enrolled. They received three doses of CV7201 intradermally or intramuscularly by needle-syringe or one of three needle-free devices. Escalating doses were given to subsequent cohorts, and one cohort received a booster dose after 1 year. The primary endpoint was safety and tolerability. The secondary endpoint was to determine the lowest dose of CV7201 to elicit rabies virus neutralising titres equal to or greater than the WHO-specified protective antibody titre of 0·5 IU/mL. The study is continuing for long-term safety and immunogenicity follow-up. This trial is registered with ClinicalTrials.gov, number NCT02241135. FINDINGS: Between Oct 21, 2013, and Jan 11, 2016, we enrolled and vaccinated 101 participants with 306 doses of mRNA (80-640 μg) by needle-syringe (18 intradermally and 24 intramuscularly) or needle-free devices (46 intradermally and 13 intramuscularly). In the 7 days post vaccination, 60 (94%) of 64 intradermally vaccinated participants and 36 (97%) of 37 intramuscularly vaccinated participants reported solicited injection site reactions, and 50 (78%) of 64 intradermally vaccinated participants and 29 (78%) of 37 intramuscularly vaccinated participants reported solicited systemic adverse events, including ten grade 3 events. One unexpected, possibly related, serious adverse reaction that occurred 7 days after a 640 μg intramuscular dose resolved without sequelae. mRNA vaccination by needle-free intradermal or intramuscular device injection induced virus neutralising antibody titres of 0·5 IU/mL or more across dose levels and schedules in 32 (71%) of 45 participants given 80 μg or 160 μg CV7201 doses intradermally and six (46%) of 13 participants given 200 μg or 400 μg CV7201 doses intramuscularly. 1 year later, eight (57%) of 14 participants boosted with an 80 μg needle-free intradermal dose of CV7201 achieved titres of 0·5 IU/mL or more. Conversely, intradermal or intramuscular needle-syringe injection was ineffective, with only one participant (who received 320 μg intradermally) showing a detectable immune response. INTERPRETATION: This first-ever demonstration in human beings shows that a prophylactic mRNA-based candidate vaccine can induce boostable functional antibodies against a viral antigen when administered with a needle-free device, although not when injected by a needle-syringe. The vaccine was generally safe with a reasonable tolerability profile. FUNDING: CureVac AG.}, keywords = {}, pubstate = {published}, tppubtype = {article} } BACKGROUND: Vaccines based on mRNA coding for antigens have been shown to be safe and immunogenic in preclinical models. We aimed to report results of the first-in-human proof-of-concept clinical trial in healthy adults of a prophylactic mRNA-based vaccine encoding rabies virus glycoprotein (CV7201). METHODS: We did an open-label, uncontrolled, prospective, phase 1 clinical trial at one centre in Munich, Germany. Healthy male and female volunteers (aged 18-40 years) with no history of rabies vaccination were sequentially enrolled. They received three doses of CV7201 intradermally or intramuscularly by needle-syringe or one of three needle-free devices. Escalating doses were given to subsequent cohorts, and one cohort received a booster dose after 1 year. The primary endpoint was safety and tolerability. The secondary endpoint was to determine the lowest dose of CV7201 to elicit rabies virus neutralising titres equal to or greater than the WHO-specified protective antibody titre of 0·5 IU/mL. The study is continuing for long-term safety and immunogenicity follow-up. This trial is registered with ClinicalTrials.gov, number NCT02241135. FINDINGS: Between Oct 21, 2013, and Jan 11, 2016, we enrolled and vaccinated 101 participants with 306 doses of mRNA (80-640 μg) by needle-syringe (18 intradermally and 24 intramuscularly) or needle-free devices (46 intradermally and 13 intramuscularly). In the 7 days post vaccination, 60 (94%) of 64 intradermally vaccinated participants and 36 (97%) of 37 intramuscularly vaccinated participants reported solicited injection site reactions, and 50 (78%) of 64 intradermally vaccinated participants and 29 (78%) of 37 intramuscularly vaccinated participants reported solicited systemic adverse events, including ten grade 3 events. One unexpected, possibly related, serious adverse reaction that occurred 7 days after a 640 μg intramuscular dose resolved without sequelae. mRNA vaccination by needle-free intradermal or intramuscular device injection induced virus neutralising antibody titres of 0·5 IU/mL or more across dose levels and schedules in 32 (71%) of 45 participants given 80 μg or 160 μg CV7201 doses intradermally and six (46%) of 13 participants given 200 μg or 400 μg CV7201 doses intramuscularly. 1 year later, eight (57%) of 14 participants boosted with an 80 μg needle-free intradermal dose of CV7201 achieved titres of 0·5 IU/mL or more. Conversely, intradermal or intramuscular needle-syringe injection was ineffective, with only one participant (who received 320 μg intradermally) showing a detectable immune response. INTERPRETATION: This first-ever demonstration in human beings shows that a prophylactic mRNA-based candidate vaccine can induce boostable functional antibodies against a viral antigen when administered with a needle-free device, although not when injected by a needle-syringe. The vaccine was generally safe with a reasonable tolerability profile. FUNDING: CureVac AG. |
van Rijswijk, M; Beirnaert, C; Caron, C; Cascante, M; Dominguez, V; Dunn, WB; Ebbels, TMD; Giacomoni, F; Gonzalez-Beltran, A; Hankemeier, T; Haug, K; Izquierdo-Garcia, JL; Jimenez, RC; Jourdan, F; Kale, N; Klapa, MI; Kohlbacher, O; Koort, K; Kultima, K; Corguillé, Le G; Moschonas, NK; Neumann, S; O?Donovan, C; Reczko, M; Rocca-Serra, P; Rosato, A; Salek, RM; Sansone, SA; Satagopam, V; Schober, D; Shimmo, R; Spicer, RA; Spjuth, O; Thévenot, EA; Viant, MR; Weber, RJM; Willighagen, EL; Zanetti, G; Steinbeck, C The future of metabolomics in ELIXIR [version 1; referees: awaiting peer review] F1000Research, 6 (1649), 2017. @article{10.12688-f1000research.12342.2, title = {The future of metabolomics in ELIXIR [version 1; referees: awaiting peer review]}, author = {M van Rijswijk and C Beirnaert and C Caron and M Cascante and V Dominguez and WB Dunn and TMD Ebbels and F Giacomoni and A Gonzalez-Beltran and T Hankemeier and K Haug and JL Izquierdo-Garcia and RC Jimenez and F Jourdan and N Kale and MI Klapa and O Kohlbacher and K Koort and K Kultima and G Le Corguillé and NK Moschonas and S Neumann and C O?Donovan and M Reczko and P Rocca-Serra and A Rosato and RM Salek and SA Sansone and V Satagopam and D Schober and R Shimmo and RA Spicer and O Spjuth and EA Thévenot and MR Viant and RJM Weber and EL Willighagen and G Zanetti and C Steinbeck}, url = {https://doi.org/10.12688/f1000research.12342.2}, year = {2017}, date = {2017-01-01}, journal = {F1000Research}, volume = {6}, number = {1649}, keywords = {}, pubstate = {published}, tppubtype = {article} } |