Publications
Sachsenberg, Timo; Herbst, Florian-Alexander; Taubert, Martin; Kermer, René; Jehmlich, Nico; von Bergen, Martin; Seifert, Jana; Kohlbacher, Oliver MetaProSIP: automated inference of stable isotope incorporation rates in proteins for functional metaproteomics J. Proteome Res., 14 (2), pp. 619-27, 2015. @article{MetaProSIP_2015, title = {MetaProSIP: automated inference of stable isotope incorporation rates in proteins for functional metaproteomics}, author = {Timo Sachsenberg and Florian-Alexander Herbst and Martin Taubert and René Kermer and Nico Jehmlich and Martin von Bergen and Jana Seifert and Oliver Kohlbacher}, url = {https://pubs.acs.org/doi/abs/10.1021/pr500245w}, year = {2015}, date = {2015-01-01}, journal = {J. Proteome Res.}, volume = {14}, number = {2}, pages = {619-27}, abstract = {We propose a joint experimental and theoretical approach to the automated reconstruction of elemental fluxes in microbial communities. While stable isotope probing of proteins (protein-SIP) has been successfully applied to study interactions and elemental carbon and nitrogen fluxes, the volume and complexity of mass spectrometric data in protein-SIP experiments poses new challenges for data analysis. Together with a flexible experimental setup, the novel bioinformatics tool MetaProSIP offers an automated high-throughput solution for a wide range of 13C or 15N protein-SIP experiments with special emphasis on the analysis of metaproteomic experiments where differential labeling of organisms can occur. The information calculated in MetaProSIP includes the determination of multiple relative isotopic abundances, the labeling ratio between old and new synthesized proteins and the shape of the isotopic distribution. These parameters define the metabolic capacities and dynamics within the investigated microbial culture. MetaProSIP features a high degree of reproducibility, reliability, and quality control reporting. Embedding into the OpenMS framework allows for flexible construction of custom-tailored workflows. Software and documentation are available under an open-source license at www.openms.de/MetaProSIP.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We propose a joint experimental and theoretical approach to the automated reconstruction of elemental fluxes in microbial communities. While stable isotope probing of proteins (protein-SIP) has been successfully applied to study interactions and elemental carbon and nitrogen fluxes, the volume and complexity of mass spectrometric data in protein-SIP experiments poses new challenges for data analysis. Together with a flexible experimental setup, the novel bioinformatics tool MetaProSIP offers an automated high-throughput solution for a wide range of 13C or 15N protein-SIP experiments with special emphasis on the analysis of metaproteomic experiments where differential labeling of organisms can occur. The information calculated in MetaProSIP includes the determination of multiple relative isotopic abundances, the labeling ratio between old and new synthesized proteins and the shape of the isotopic distribution. These parameters define the metabolic capacities and dynamics within the investigated microbial culture. MetaProSIP features a high degree of reproducibility, reliability, and quality control reporting. Embedding into the OpenMS framework allows for flexible construction of custom-tailored workflows. Software and documentation are available under an open-source license at www.openms.de/MetaProSIP. |
Gerasch, Andreas; Küntzer, Jan; Niermann, Peter; Stöckel, Daniel; Kohlbacher, Oliver; Lenhof, Hans-Peter Network-based interactive navigation and analysis of large biological datasets it - Information Technology, 57 (1), pp. 37-48, 2015. @article{itITVisAnalytics2015, title = {Network-based interactive navigation and analysis of large biological datasets}, author = {Andreas Gerasch and Jan Küntzer and Peter Niermann and Daniel Stöckel and Oliver Kohlbacher and Hans-Peter Lenhof}, doi = {https://doi.org/10.1515/itit-2014-1076}, year = {2015}, date = {2015-01-01}, journal = {it - Information Technology}, volume = {57}, number = {1}, pages = {37-48}, abstract = {Over the last decade, advances in high-throughput technologies have resulted in a flood of new biological data. Here, individual samples can extend up into terabyte size. While potential applications are broad, ranging from biotechnology to medical applications, the analysis of these datasets poses massive challenges. In order to make use of the produced terabytes of data, these datasets need to be integrated, need to be mapped onto existing biological knowledge, and need to be explored by experts. We present UniPAX and BiNA, a scalable system for the integration and analysis of high-throughput data (genomics, transcriptomics, proteomics, and metabolomics) in a network context. A central data warehouse holds the core dataset. A flexible middleware can execute custom queries on these datasets and communicate with our visual analytics tool BiNA, the Biological Network Analyzer. We demonstrate how the combination of these tools permits an efficient analysis of large-scale datasets for medical applications.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Over the last decade, advances in high-throughput technologies have resulted in a flood of new biological data. Here, individual samples can extend up into terabyte size. While potential applications are broad, ranging from biotechnology to medical applications, the analysis of these datasets poses massive challenges. In order to make use of the produced terabytes of data, these datasets need to be integrated, need to be mapped onto existing biological knowledge, and need to be explored by experts. We present UniPAX and BiNA, a scalable system for the integration and analysis of high-throughput data (genomics, transcriptomics, proteomics, and metabolomics) in a network context. A central data warehouse holds the core dataset. A flexible middleware can execute custom queries on these datasets and communicate with our visual analytics tool BiNA, the Biological Network Analyzer. We demonstrate how the combination of these tools permits an efficient analysis of large-scale datasets for medical applications. |
Ziller, Michael J; Edri, Reuven; Yaffe, Yakey; Donaghey, Julie; Pop, Ramona; Mallard, William; Issner, Robbyn; Gifford, Casey A; Goren, Alon; Xing, Jeffrey; Gu, Hongcang; Cacchiarelli, Davide; Tsankov, Alexander M; Epstein, Charles; Rinn, John L; Mikkelsen, Tarjei S; Kohlbacher, Oliver; Gnirke, Andreas; Bernstein, Bradley E; Elkabetz, Yechiel; Meissner, Alexander Dissecting neural differentiation regulatory networks through epigenetic footprinting. Nature, 518 , pp. 355-359, 2015. @article{Ziller2015, title = {Dissecting neural differentiation regulatory networks through epigenetic footprinting.}, author = {Michael J Ziller and Reuven Edri and Yakey Yaffe and Julie Donaghey and Ramona Pop and William Mallard and Robbyn Issner and Casey A Gifford and Alon Goren and Jeffrey Xing and Hongcang Gu and Davide Cacchiarelli and Alexander M Tsankov and Charles Epstein and John L Rinn and Tarjei S Mikkelsen and Oliver Kohlbacher and Andreas Gnirke and Bradley E Bernstein and Yechiel Elkabetz and Alexander Meissner}, url = {http://dx.doi.org/10.1038/nature13990}, year = {2015}, date = {2015-01-01}, journal = {Nature}, volume = {518}, pages = {355-359}, abstract = {Models derived from human pluripotent stem cells that accurately recapitulate neural development in vitro and allow for the generation of specific neuronal subtypes are of major interest to the stem cell and biomedical community. Notch signalling, particularly through the Notch effector HES5, is a major pathway critical for the onset and maintenance of neural progenitor cells in the embryonic and adult nervous system. Here we report the transcriptional and epigenomic analysis of six consecutive neural progenitor cell stages derived from a HES5::eGFP reporter human embryonic stem cell line. Using this system, we aimed to model cell-fate decisions including specification, expansion and patterning during the ontogeny of cortical neural stem and progenitor cells. In order to dissect regulatory mechanisms that orchestrate the stage-specific differentiation process, we developed a computational framework to infer key regulators of each cell-state transition based on the progressive remodelling of the epigenetic landscape and then validated these through a pooled short hairpin RNA screen. We were also able to refine our previous observations on epigenetic priming at transcription factor binding sites and suggest here that they are mediated by combinations of core and stage-specific factors. Taken together, we demonstrate the utility of our system and outline a general framework, not limited to the context of the neural lineage, to dissect regulatory circuits of differentiation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Models derived from human pluripotent stem cells that accurately recapitulate neural development in vitro and allow for the generation of specific neuronal subtypes are of major interest to the stem cell and biomedical community. Notch signalling, particularly through the Notch effector HES5, is a major pathway critical for the onset and maintenance of neural progenitor cells in the embryonic and adult nervous system. Here we report the transcriptional and epigenomic analysis of six consecutive neural progenitor cell stages derived from a HES5::eGFP reporter human embryonic stem cell line. Using this system, we aimed to model cell-fate decisions including specification, expansion and patterning during the ontogeny of cortical neural stem and progenitor cells. In order to dissect regulatory mechanisms that orchestrate the stage-specific differentiation process, we developed a computational framework to infer key regulators of each cell-state transition based on the progressive remodelling of the epigenetic landscape and then validated these through a pooled short hairpin RNA screen. We were also able to refine our previous observations on epigenetic priming at transcription factor binding sites and suggest here that they are mediated by combinations of core and stage-specific factors. Taken together, we demonstrate the utility of our system and outline a general framework, not limited to the context of the neural lineage, to dissect regulatory circuits of differentiation. |
Aiche, Stephan; Sachsenberg, Timo; Kenar, Erhan; Walzer, Mathias; Wiswedel, Bernd; Kristl, Theresa; Boyles, Matthew; Duschl, Albert; Huber, Christian; Berthold, Michael; Reinert, Knut; Kohlbacher, Oliver Workflows for automated downstream data analysis and visualization in large-scale computational mass spectrometry Proteomics, 15 (8), pp. 1443-7, 2015. @article{ProtWFVIs2015, title = {Workflows for automated downstream data analysis and visualization in large-scale computational mass spectrometry}, author = {Stephan Aiche and Timo Sachsenberg and Erhan Kenar and Mathias Walzer and Bernd Wiswedel and Theresa Kristl and Matthew Boyles and Albert Duschl and Christian Huber and Michael Berthold and Knut Reinert and Oliver Kohlbacher}, doi = {https://doi.org/10.1002/pmic.201400391}, year = {2015}, date = {2015-01-01}, journal = {Proteomics}, volume = {15}, number = {8}, pages = {1443-7}, abstract = {Mass spectrometry-based proteomics and metabolomics are rapidly evolving research fields driven by the development of novel instruments, experimental approaches, and analysis methods. Monolithic analysis tools perform well on single tasks but lack the flexibility to cope with the constantly changing requirements and experimental setups. Workflow systems, which combine small processing tools into complex analysis pipelines, allow custom-tailored and flexible data processing workflows that can be published or shared with collaborators. In this article, we present the integration of established tools for computational mass spectrometry from the open-source software framework OpenMS into the workflow engine KNIME for the analysis of large data sets and production of high quality visualizations. We provide example workflows to demonstrate combined data processing and visualization for three diverse tasks in computational mass spectrometry: isobaric mass tag based quantitation in complex experimental setups, label-free quantitation and identification of metabolites, and quality control for proteomics experiments.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Mass spectrometry-based proteomics and metabolomics are rapidly evolving research fields driven by the development of novel instruments, experimental approaches, and analysis methods. Monolithic analysis tools perform well on single tasks but lack the flexibility to cope with the constantly changing requirements and experimental setups. Workflow systems, which combine small processing tools into complex analysis pipelines, allow custom-tailored and flexible data processing workflows that can be published or shared with collaborators. In this article, we present the integration of established tools for computational mass spectrometry from the open-source software framework OpenMS into the workflow engine KNIME for the analysis of large data sets and production of high quality visualizations. We provide example workflows to demonstrate combined data processing and visualization for three diverse tasks in computational mass spectrometry: isobaric mass tag based quantitation in complex experimental setups, label-free quantitation and identification of metabolites, and quality control for proteomics experiments. |
Schubert, Benjamin; Brachvogel, Hans-Philipp; Jürges, Christopher; Kohlbacher, Oliver EpiToolKit – A Web-based Workbench for Vaccine Design Bioinformatics, 31 (13), pp. 2211-3, 2015. @article{ETK2015, title = {EpiToolKit – A Web-based Workbench for Vaccine Design}, author = {Benjamin Schubert and Hans-Philipp Brachvogel and Christopher Jürges and Oliver Kohlbacher}, doi = {https://doi.org/10.1093/bioinformatics/btv116}, year = {2015}, date = {2015-01-01}, journal = {Bioinformatics}, volume = {31}, number = {13}, pages = {2211-3}, abstract = {EpiToolKit is a virtual workbench for immunological questions with a focus on vaccine design. It offers an array of immunoinformatics tools covering MHC genotyping, epitope and neo-epitope prediction, epitope selection for vaccine design, and epitope assembly. In its recently re-implemented version 2.0, EpiToolKit provides a range of new functionality and for the first time allows combining tools into complex workflows. For inexperienced users it offers simplified interfaces to guide the users through the analysis of complex immunological data sets.}, keywords = {}, pubstate = {published}, tppubtype = {article} } EpiToolKit is a virtual workbench for immunological questions with a focus on vaccine design. It offers an array of immunoinformatics tools covering MHC genotyping, epitope and neo-epitope prediction, epitope selection for vaccine design, and epitope assembly. In its recently re-implemented version 2.0, EpiToolKit provides a range of new functionality and for the first time allows combining tools into complex workflows. For inexperienced users it offers simplified interfaces to guide the users through the analysis of complex immunological data sets. |
Friedrich, Andreas; Kenar, Erhan; Kohlbacher, Oliver; Nahnsen, Sven Intuitive Web-based Experimental Design for High-throughput Biomedical Data BioMed Res Int, 2015 , pp. 958302, 2015. @article{articlereference.2015-03-10.0971887022, title = {Intuitive Web-based Experimental Design for High-throughput Biomedical Data}, author = {Andreas Friedrich and Erhan Kenar and Oliver Kohlbacher and Sven Nahnsen}, doi = {https://doi.org/10.1155/2015/958302}, year = {2015}, date = {2015-01-01}, journal = {BioMed Res Int}, volume = {2015}, pages = {958302}, abstract = {Big data bioinformatics aims at drawing biological conclusions from huge and complex biological datasets. Added value from the analysis of big data, however, is only possible if the data is accompanied by accurate metadata annotation. Particularly in high-throughput experiments intelligent approaches are needed to keep track of the experimental design, including the conditions that are studied as well as information that might be interesting for failure analysis or further experiments in the future. In addition to the management of this information, means for an integrated design and interfaces for structured data annotation are urgently needed by researchers. Here, we propose a factor-based experimental design approach that enables scientists to easily create large-scale experiments with the help of a web-based system. We present a novel implementation of a web-based interface allowing the collection of arbitrary metadata. To exchange and edit information we provide a spreadsheet-based, humanly readable format. Subsequently, sample sheets with identifiers and metainformation for data generation facilities can be created. Data files created after measurement of the samples can be uploaded to a datastore, where they are automatically linked to the previously created experimental design model.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Big data bioinformatics aims at drawing biological conclusions from huge and complex biological datasets. Added value from the analysis of big data, however, is only possible if the data is accompanied by accurate metadata annotation. Particularly in high-throughput experiments intelligent approaches are needed to keep track of the experimental design, including the conditions that are studied as well as information that might be interesting for failure analysis or further experiments in the future. In addition to the management of this information, means for an integrated design and interfaces for structured data annotation are urgently needed by researchers. Here, we propose a factor-based experimental design approach that enables scientists to easily create large-scale experiments with the help of a web-based system. We present a novel implementation of a web-based interface allowing the collection of arbitrary metadata. To exchange and edit information we provide a spreadsheet-based, humanly readable format. Subsequently, sample sheets with identifiers and metainformation for data generation facilities can be created. Data files created after measurement of the samples can be uploaded to a datastore, where they are automatically linked to the previously created experimental design model. |
Martens, Lennart; Kohlbacher, Oliver; Weintraub, Susan T Managing Expectations when Publishing Tools and Methods for Computational Proteomics J. Proteome Res., 14 (5), pp. 2002-4, 2015. @article{JPR_Expectations_2015, title = {Managing Expectations when Publishing Tools and Methods for Computational Proteomics}, author = {Lennart Martens and Oliver Kohlbacher and Susan T Weintraub}, url = {http://pubs.acs.org/doi/abs/10.1021/pr501318d}, year = {2015}, date = {2015-01-01}, journal = {J. Proteome Res.}, volume = {14}, number = {5}, pages = {2002-4}, abstract = {Computational tools are pivotal in proteomics because they are crucial for identification, quantification, and statistical assessment of data. The gateway to finding the best choice of a tool or approach for a particular problem is frequently journal articles. Yet, there is often an overwhelming variety of options that makes it hard to decide on the best solution. This is particularly difficult for non-experts in bioinformatics. The maturity, reliability, and performance of tools can vary widely, since publications may appear at different stages of development. A novel idea might merit early publication despite only offering proof-of-principle, while it may take years before a tool can be considered mature, and by that time it might be difficult for a new publication to be accepted because of a perceived lack of novelty. After discussions with members of the computational mass spectrometry community, we describe here proposed recommendations for organization of informatics manuscripts as a way to set the expectations of readers (and reviewers) through three different manuscript types that are based on existing journal designations. Brief Communications are short reports describing novel computational approaches where the implementation is not necessarily production-ready. Research Articles present both a novel idea and mature implementation that has been suitably benchmarked. Application Notes focus on a mature and tested tool or concept, and need not be novel but should offer advancement from improved quality, ease of use and/or implementation. Organizing computational proteomics contributions into these three manuscript types will facilitate the review process and will also enable readers to identify the maturity and applicability of the tool for their own workflows.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Computational tools are pivotal in proteomics because they are crucial for identification, quantification, and statistical assessment of data. The gateway to finding the best choice of a tool or approach for a particular problem is frequently journal articles. Yet, there is often an overwhelming variety of options that makes it hard to decide on the best solution. This is particularly difficult for non-experts in bioinformatics. The maturity, reliability, and performance of tools can vary widely, since publications may appear at different stages of development. A novel idea might merit early publication despite only offering proof-of-principle, while it may take years before a tool can be considered mature, and by that time it might be difficult for a new publication to be accepted because of a perceived lack of novelty. After discussions with members of the computational mass spectrometry community, we describe here proposed recommendations for organization of informatics manuscripts as a way to set the expectations of readers (and reviewers) through three different manuscript types that are based on existing journal designations. Brief Communications are short reports describing novel computational approaches where the implementation is not necessarily production-ready. Research Articles present both a novel idea and mature implementation that has been suitably benchmarked. Application Notes focus on a mature and tested tool or concept, and need not be novel but should offer advancement from improved quality, ease of use and/or implementation. Organizing computational proteomics contributions into these three manuscript types will facilitate the review process and will also enable readers to identify the maturity and applicability of the tool for their own workflows. |
Uszkoreit, Julian; Maerkens, Alexandra; Perez-Riverol, Yasset; Meyer, Helmut E; Marcus, Katrin; Stephan, Christian; Kohlbacher, Oliver; Eisenacher, Martin PIA - An intuitive protein inference engine with a web-based user interface J. Proteome Res., 14 (7), pp. 2988-97, 2015. @article{PIA_JPR_2015, title = {PIA - An intuitive protein inference engine with a web-based user interface}, author = {Julian Uszkoreit and Alexandra Maerkens and Yasset Perez-Riverol and Helmut E Meyer and Katrin Marcus and Christian Stephan and Oliver Kohlbacher and Martin Eisenacher}, doi = {https://doi.org/10.1021/acs.jproteome.5b00121}, year = {2015}, date = {2015-01-01}, journal = {J. Proteome Res.}, volume = {14}, number = {7}, pages = {2988-97}, abstract = {Protein inference connects the peptide-spectrum matches (PSMs) obtained from database search engines back to proteins, which are typically at the heart of most proteomics studies. Different search engines yield different PSMs and thus different protein lists. Analysis of results from one or multiple search engines is often hampered by different data exchange formats and lack of convenient and intuitive user interfaces. We present PIA, a flexible software suite for combining PSMs from different search engine runs and turning these into consistent results. PIA can be integrated into proteomics data analysis workflows in several ways. A user-friendly graphical user interface can either be run locally or (e.g., for larger core facilities) from a central server. For automated data processing, stand-alone tools are available. PIA implements several established protein inference algorithms and can combine results from different search engines seamlessly. On several benchmark datasets we can show that PIA can identify a larger number of proteins at the same protein FDR when compared to inference based on a single search engine. PIA supports the majority of established search engines and data in the mzIdentML standard format. It is implemented in Java and freely available from https://github.com/mpc-bioinformatics/pia.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Protein inference connects the peptide-spectrum matches (PSMs) obtained from database search engines back to proteins, which are typically at the heart of most proteomics studies. Different search engines yield different PSMs and thus different protein lists. Analysis of results from one or multiple search engines is often hampered by different data exchange formats and lack of convenient and intuitive user interfaces. We present PIA, a flexible software suite for combining PSMs from different search engine runs and turning these into consistent results. PIA can be integrated into proteomics data analysis workflows in several ways. A user-friendly graphical user interface can either be run locally or (e.g., for larger core facilities) from a central server. For automated data processing, stand-alone tools are available. PIA implements several established protein inference algorithms and can combine results from different search engines seamlessly. On several benchmark datasets we can show that PIA can identify a larger number of proteins at the same protein FDR when compared to inference based on a single search engine. PIA supports the majority of established search engines and data in the mzIdentML standard format. It is implemented in Java and freely available from https://github.com/mpc-bioinformatics/pia. |
Sharma, Kundan; Hrle, Ajla; Kramer, Katharina; Sachsenberg, Timo; Staals, Raymond H J; Randau, Lennart; Marchfelder, Anita; van der Oost, John; Kohlbacher, Oliver; Conti, Elena; Urlaub, Henning Analysis of protein-RNA interactions in CRISPR proteins and effector complexes by UV-induced cross-linking and mass spectrometry Methods, pp. S1046-2023(15)00246-7, 2015. @article{RNPxlMethods, title = {Analysis of protein-RNA interactions in CRISPR proteins and effector complexes by UV-induced cross-linking and mass spectrometry}, author = {Kundan Sharma and Ajla Hrle and Katharina Kramer and Timo Sachsenberg and Raymond H J Staals and Lennart Randau and Anita Marchfelder and John van der Oost and Oliver Kohlbacher and Elena Conti and Henning Urlaub}, doi = {https://doi.org/10.1016/j.ymeth.2015.06.005}, year = {2015}, date = {2015-01-01}, journal = {Methods}, pages = {S1046-2023(15)00246-7}, abstract = {Ribonucleoprotein (RNP) complexes play important roles in the cell by mediating basic cellular processes, including gene expression and its regulation. Understanding the molecular details of these processes requires the identification and characterization of protein-RNA interactions. Over the years various approaches have been used to investigate these interactions, including computational analyses to look for RNA binding domains, gel-shift mobility assays on recombinant and mutant proteins as well as co-crystallization and NMR studies for structure elucidation. Here we report a more specialized and direct approach using UV-induced cross-linking coupled with mass spectrometry. This approach permits the identification of cross-linked peptides and RNA moieties and can also pin-point exact RNA contact sites within the protein. The power of this method is illustrated by the application to different single- and multi-subunit RNP complexes belonging to the prokaryotic adaptive immune system, CRISPR-Cas (CRISPR: clustered regularly interspaced short palindromic repeats; Cas: CRISPR associated). In particular, we identified the RNA-binding sites within three Cas7 protein homologs and mapped the cross-linking results to reveal structurally conserved Cas7 - RNA binding interfaces. These results demonstrate the strong potential of UV-induced cross-linking coupled with mass spectrometry analysis to identify RNA interaction sites on the RNA binding proteins.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Ribonucleoprotein (RNP) complexes play important roles in the cell by mediating basic cellular processes, including gene expression and its regulation. Understanding the molecular details of these processes requires the identification and characterization of protein-RNA interactions. Over the years various approaches have been used to investigate these interactions, including computational analyses to look for RNA binding domains, gel-shift mobility assays on recombinant and mutant proteins as well as co-crystallization and NMR studies for structure elucidation. Here we report a more specialized and direct approach using UV-induced cross-linking coupled with mass spectrometry. This approach permits the identification of cross-linked peptides and RNA moieties and can also pin-point exact RNA contact sites within the protein. The power of this method is illustrated by the application to different single- and multi-subunit RNP complexes belonging to the prokaryotic adaptive immune system, CRISPR-Cas (CRISPR: clustered regularly interspaced short palindromic repeats; Cas: CRISPR associated). In particular, we identified the RNA-binding sites within three Cas7 protein homologs and mapped the cross-linking results to reveal structurally conserved Cas7 - RNA binding interfaces. These results demonstrate the strong potential of UV-induced cross-linking coupled with mass spectrometry analysis to identify RNA interaction sites on the RNA binding proteins. |
Ranninger, Christina; Rurik, Marc; Limonciel, Alice; Ruzek, Silke; Reischl, Roland; Wilmes, Anja; Jennings, Paul; Hewitt, Philip; Dekant, Wolfgang; Kohlbacher, Oliver; Huber, Christian G Nephron Toxicity Profiling via Untargeted Metabolome Analysis Employing a High-Performance Liquid Chromatography-Mass Spectrometry-Based Experimental and Computational Pipeline J. Biol. Chem., 290 (31), pp. 19121-32, 2015. @article{JBCNephronTox2015, title = {Nephron Toxicity Profiling via Untargeted Metabolome Analysis Employing a High-Performance Liquid Chromatography-Mass Spectrometry-Based Experimental and Computational Pipeline}, author = {Christina Ranninger and Marc Rurik and Alice Limonciel and Silke Ruzek and Roland Reischl and Anja Wilmes and Paul Jennings and Philip Hewitt and Wolfgang Dekant and Oliver Kohlbacher and Christian G Huber}, doi = {https://doi.org/10.1074/jbc.M115.644146}, year = {2015}, date = {2015-01-01}, journal = {J. Biol. Chem.}, volume = {290}, number = {31}, pages = {19121-32}, abstract = {Untargeted metabolomics has the potential to improve the predictivity of in vitro toxicity models and therefore may aid the replacement of expensive and laborious animal models. Here we describe a long-term repeat dose nephrotoxicity study conducted on the human renal proximal tubular epithelial cell line, RPTEC/TERT1, treated with 10 µmol.L-1 and 35 µmol.L-1 of chloroacetaldehyde - a metabolite of the anti-cancer drug ifosfamide. Our study outlines the establishment of an automated and easy to use untargeted metabolomics workflow for HPLC-HRMS data. Automated data analysis workflows based on open-source software (OpenMS, KNIME) enabled a comprehensive and reproducible analysis of the complex and voluminous metabolomics data produced by the profiling approach. Time- and concentration dependent responses were clearly evident in the metabolomic profiles. In order to obtain a more comprehensive picture of the mode of action, transcriptomics and proteomics data were also integrated. For toxicity profiling of chloroacetaldehyde, 428 and 317 metabolite features were detectable in positive and negative mode, respectively, after stringent removal of chemical noise and unstable signals. Changes upon treatment were explored using principal component analysis (PCA) and statistically significant differences were identified using linear models (LIMMA). The analysis revealed toxic effects only for the treatment with 35 µmol.L-1 for 3 and 14 days. The most regulated metabolites were glutathione and metabolites related to the oxidative stress response of the cells. These findings are corroborated by proteomics and transcriptomics data, which show, amongst others, an activation of the Nrf2 and ATF4 pathways.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Untargeted metabolomics has the potential to improve the predictivity of in vitro toxicity models and therefore may aid the replacement of expensive and laborious animal models. Here we describe a long-term repeat dose nephrotoxicity study conducted on the human renal proximal tubular epithelial cell line, RPTEC/TERT1, treated with 10 µmol.L-1 and 35 µmol.L-1 of chloroacetaldehyde - a metabolite of the anti-cancer drug ifosfamide. Our study outlines the establishment of an automated and easy to use untargeted metabolomics workflow for HPLC-HRMS data. Automated data analysis workflows based on open-source software (OpenMS, KNIME) enabled a comprehensive and reproducible analysis of the complex and voluminous metabolomics data produced by the profiling approach. Time- and concentration dependent responses were clearly evident in the metabolomic profiles. In order to obtain a more comprehensive picture of the mode of action, transcriptomics and proteomics data were also integrated. For toxicity profiling of chloroacetaldehyde, 428 and 317 metabolite features were detectable in positive and negative mode, respectively, after stringent removal of chemical noise and unstable signals. Changes upon treatment were explored using principal component analysis (PCA) and statistically significant differences were identified using linear models (LIMMA). The analysis revealed toxic effects only for the treatment with 35 µmol.L-1 for 3 and 14 days. The most regulated metabolites were glutathione and metabolites related to the oxidative stress response of the cells. These findings are corroborated by proteomics and transcriptomics data, which show, amongst others, an activation of the Nrf2 and ATF4 pathways. |
Simha, Ramanuja; Briesemeister, Sebastian; Kohlbacher, Oliver; Shatkay, Hagit Protein (Multi-)Location Prediction: Utilizing Interdependencies via a Generative Model Bioinformatics, 31 (12), pp. i365-i374, 2015. @article{GenModelISMB2015, title = {Protein (Multi-)Location Prediction: Utilizing Interdependencies via a Generative Model}, author = {Ramanuja Simha and Sebastian Briesemeister and Oliver Kohlbacher and Hagit Shatkay}, url = {http://bioinformatics.oxfordjournals.org/content/31/12/i365.abstract?sid=eeb20637-6479-4328-afe6-8916faadafd8}, year = {2015}, date = {2015-01-01}, journal = {Bioinformatics}, volume = {31}, number = {12}, pages = {i365-i374}, abstract = {Motivation: Proteins are responsible for a multitude of vital tasks in all living organisms. Given that a protein’s function and role are strongly related to its subcellular location, protein location prediction is an important research area. While proteins move from one location to another and can localize to multiple locations, most existing location prediction systems assign only a single location per protein. A few recent systems attempt to predict multiple locations for proteins, however, their performance leaves much room for improvement. Moreover, such systems do not capture dependencies among locations and usually consider locations as independent. We hypothesize that a multi-location predictor that captures location inter-dependencies can improve location predictions for proteins. Results: We introduce a probabilistic generative model for protein localization, and develop a system based on it—which we call MDLoc—that utilizes inter-dependencies among locations to predict multiple locations for proteins. The model captures location inter-dependencies using Bayesian networks and represents dependency between features and locations using a mixture model. We use iterative processes for learning model parameters and for estimating protein locations. We evaluate our classifier MDLoc, on a dataset of single- and multi-localized proteins derived from the DBMLoc dataset, which is the most comprehensive protein multi-localization dataset currently available. Our results, obtained by using MDLoc, significantly improve upon results obtained by an initial simpler classifier, as well as on results reported by other top systems. Availability and implementation: MDLoc is available at: http://www.eecis.udel.edu/∼compbio/mdloc.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: Proteins are responsible for a multitude of vital tasks in all living organisms. Given that a protein’s function and role are strongly related to its subcellular location, protein location prediction is an important research area. While proteins move from one location to another and can localize to multiple locations, most existing location prediction systems assign only a single location per protein. A few recent systems attempt to predict multiple locations for proteins, however, their performance leaves much room for improvement. Moreover, such systems do not capture dependencies among locations and usually consider locations as independent. We hypothesize that a multi-location predictor that captures location inter-dependencies can improve location predictions for proteins. Results: We introduce a probabilistic generative model for protein localization, and develop a system based on it—which we call MDLoc—that utilizes inter-dependencies among locations to predict multiple locations for proteins. The model captures location inter-dependencies using Bayesian networks and represents dependency between features and locations using a mixture model. We use iterative processes for learning model parameters and for estimating protein locations. We evaluate our classifier MDLoc, on a dataset of single- and multi-localized proteins derived from the DBMLoc dataset, which is the most comprehensive protein multi-localization dataset currently available. Our results, obtained by using MDLoc, significantly improve upon results obtained by an initial simpler classifier, as well as on results reported by other top systems. Availability and implementation: MDLoc is available at: http://www.eecis.udel.edu/∼compbio/mdloc. |
Walz, Simon; Stickel, Juliane Sarah; Kowalewski, Daniel Johannes; Schuster, Heiko; Weisel, Katja; Backert, Linus; Kahn, Stefan; Nelde, Annika; Stroh, Tatjana; Handel, Martina; Kohlbacher, Oliver; Kanz, Lothar; Salih, Helmut Rainer; Rammensee, Hans-Georg; Stevanovic, Stefan The antigenic landscape of multiple myeloma: mass spectrometry (re-)defines targets for T-cell based immunotherapy Blood, 126 (10), pp. 1203-13, 2015. @article{BloodMyeloma2015, title = {The antigenic landscape of multiple myeloma: mass spectrometry (re-)defines targets for T-cell based immunotherapy}, author = {Simon Walz and Juliane Sarah Stickel and Daniel Johannes Kowalewski and Heiko Schuster and Katja Weisel and Linus Backert and Stefan Kahn and Annika Nelde and Tatjana Stroh and Martina Handel and Oliver Kohlbacher and Lothar Kanz and Helmut Rainer Salih and Hans-Georg Rammensee and Stefan Stevanovic}, doi = {https://doi.org/10.1182/blood-2015-04-640532}, year = {2015}, date = {2015-01-01}, journal = {Blood}, volume = {126}, number = {10}, pages = {1203-13}, abstract = {Direct analysis of HLA presented antigens by mass spectrometry provides a comprehensive view on the antigenic landscape of different tissues/malignancies and enables the identification of novel, pathophysiologically relevant T-cell epitopes. Here we present a systematic and comparative study of the HLA class I and II presented, non-mutant antigenome of multiple myeloma (MM). Quantification of HLA surface expression revealed elevated HLA molecule counts on malignant plasma cells compared to normal B cells, excluding relevant HLA down-regulation in MM. Analyzing the presentation of established myeloma-associated T-cell antigens on the HLA ligandome level, we found a substantial proportion of antigens to be only infrequently presented on primary myelomas or to display suboptimal degrees of myeloma-specificity. However, unsupervised analysis of our extensive HLA ligand dataset delineated a panel of 58 highly specific myeloma-associated antigens -including MMSET- which are characterized by frequent and exclusive presentation on myeloma samples. Functional characterization of these target antigens revealed peptide-specific, pre-existing CD8+ T-cell responses exclusively in myeloma patients, which is indicative of pathophysiological relevance. Furthermore, in vitro priming experiments revealed that peptide-specific T-cell responses can be induced in response-naïve myeloma patients. Together, our results serve to guide antigen selection for T-cell based immunotherapy of MM.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Direct analysis of HLA presented antigens by mass spectrometry provides a comprehensive view on the antigenic landscape of different tissues/malignancies and enables the identification of novel, pathophysiologically relevant T-cell epitopes. Here we present a systematic and comparative study of the HLA class I and II presented, non-mutant antigenome of multiple myeloma (MM). Quantification of HLA surface expression revealed elevated HLA molecule counts on malignant plasma cells compared to normal B cells, excluding relevant HLA down-regulation in MM. Analyzing the presentation of established myeloma-associated T-cell antigens on the HLA ligandome level, we found a substantial proportion of antigens to be only infrequently presented on primary myelomas or to display suboptimal degrees of myeloma-specificity. However, unsupervised analysis of our extensive HLA ligand dataset delineated a panel of 58 highly specific myeloma-associated antigens -including MMSET- which are characterized by frequent and exclusive presentation on myeloma samples. Functional characterization of these target antigens revealed peptide-specific, pre-existing CD8+ T-cell responses exclusively in myeloma patients, which is indicative of pathophysiological relevance. Furthermore, in vitro priming experiments revealed that peptide-specific T-cell responses can be induced in response-naïve myeloma patients. Together, our results serve to guide antigen selection for T-cell based immunotherapy of MM. |
Aicheler, Fabian; Li, Jia; Lehmann, Rainer; Xu, Guowang; Kohlbacher, Oliver Retention Time Prediction Improves Identification in Non-Targeted Lipidomics Approaches Anal. Chem., 87 (15), pp. 7698-704, 2015. @article{LipidRT2015, title = {Retention Time Prediction Improves Identification in Non-Targeted Lipidomics Approaches}, author = {Fabian Aicheler and Jia Li and Rainer Lehmann and Guowang Xu and Oliver Kohlbacher}, doi = {https://doi.org/10.1021/acs.analchem.5b01139}, year = {2015}, date = {2015-01-01}, journal = {Anal. Chem.}, volume = {87}, number = {15}, pages = {7698-704}, abstract = {Identification of lipids in non-targeted lipidomics based on liquid-chromatography coupled to mass spectrometry (LC-MS) is still a major issue. While both accurate mass and fragment spectra contain valuable information, retention time (RT) information can be used to augment this data. We present a retention time model based on machine learning approaches which enables an improved assignment of lipid structures and automated annotation of lipidomics data. In contrast to common approaches we used a complex mixture of 201 lipids originating from fat tissue instead of a standard mixture to train a support vector regression (SVR) model including molecular structural features. The cross-validated model achieves correlation coefficients between predicted and experimental retention times of r = 0.989. Of note, as few as 50 reference lipids of different classes are sufficient to adapt to different chromatographic setups. Combining our retention time model with identification via accurate mass search (AMS) of lipids against the comprehensive LIPID MAPS database, retention time filtering can significantly reduce the rate of false positives in complex data sets like adipose tissue extracts. In our case, filtering with retention time information removed more than half of the potential identifications, while retaining 95 % of the correct identifications. Combination of high-precision retention time prediction and accurate mass can thus significantly narrow down the number of hypotheses to be assessed for lipid identification in complex lipid pattern like tissue profiles.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Identification of lipids in non-targeted lipidomics based on liquid-chromatography coupled to mass spectrometry (LC-MS) is still a major issue. While both accurate mass and fragment spectra contain valuable information, retention time (RT) information can be used to augment this data. We present a retention time model based on machine learning approaches which enables an improved assignment of lipid structures and automated annotation of lipidomics data. In contrast to common approaches we used a complex mixture of 201 lipids originating from fat tissue instead of a standard mixture to train a support vector regression (SVR) model including molecular structural features. The cross-validated model achieves correlation coefficients between predicted and experimental retention times of r = 0.989. Of note, as few as 50 reference lipids of different classes are sufficient to adapt to different chromatographic setups. Combining our retention time model with identification via accurate mass search (AMS) of lipids against the comprehensive LIPID MAPS database, retention time filtering can significantly reduce the rate of false positives in complex data sets like adipose tissue extracts. In our case, filtering with retention time information removed more than half of the potential identifications, while retaining 95 % of the correct identifications. Combination of high-precision retention time prediction and accurate mass can thus significantly narrow down the number of hypotheses to be assessed for lipid identification in complex lipid pattern like tissue profiles. |
de Lange, Orlando; Wolf, Christina; Thiel, Philipp; Krüger, Jens; Kleusch, Christian; Kohlbacher, Oliver; Lahaye, Thomas DNA-binding proteins from marine bacteria expand the sequence diversity of known TALE-like repeats Nucl. Acids Res, 43 (20), pp. 10065-80, 2015. @article{MOrTLs-NAR-2015, title = {DNA-binding proteins from marine bacteria expand the sequence diversity of known TALE-like repeats}, author = {Orlando de Lange and Christina Wolf and Philipp Thiel and Jens Krüger and Christian Kleusch and Oliver Kohlbacher and Thomas Lahaye}, url = {http://nar.oxfordjournals.org/content/early/2015/10/19/nar.gkv1053.abstract}, year = {2015}, date = {2015-01-01}, journal = {Nucl. Acids Res}, volume = {43}, number = {20}, pages = {10065-80}, abstract = {Transcription Activator-Like Effectors (TALEs) of Xanthomonas bacteria are programmable DNA binding proteins with unprecedented target specificity. Comparative studies into TALE repeat structure and function are hindered by the limited sequence variation among TALE repeats. More sequence-diverse TALE-like proteins are known from Ralstonia solanacearum (RipTALs) and Burkholderia rhizoxinica (Bats), but RipTAL and Bat repeats are conserved with those of TALEs around the DNA-binding residue. We study two novel marine-organism TALE-like proteins (MOrTL1 and MOrTL2), the first to date of non-terrestrial origin. We have assessed their DNA-binding properties and modelled repeat structures. We found that repeats from these proteins mediate sequence specific DNA binding conforming to the TALE code, despite low sequence similarity to TALE repeats, and with novel residues around the BSR. However, MOrTL1 repeats show greater sequence discriminating power than MOrTL2 repeats. Sequence alignments show that there are only three residues conserved between repeats of all TALE-like proteins including the two new additions. This conserved motif could prove useful as an identifier for future TALE-likes. Additionally, comparing MOrTL repeats with those of other TALE-likes suggests a common evolutionary origin for the TALEs, RipTALs and Bats.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Transcription Activator-Like Effectors (TALEs) of Xanthomonas bacteria are programmable DNA binding proteins with unprecedented target specificity. Comparative studies into TALE repeat structure and function are hindered by the limited sequence variation among TALE repeats. More sequence-diverse TALE-like proteins are known from Ralstonia solanacearum (RipTALs) and Burkholderia rhizoxinica (Bats), but RipTAL and Bat repeats are conserved with those of TALEs around the DNA-binding residue. We study two novel marine-organism TALE-like proteins (MOrTL1 and MOrTL2), the first to date of non-terrestrial origin. We have assessed their DNA-binding properties and modelled repeat structures. We found that repeats from these proteins mediate sequence specific DNA binding conforming to the TALE code, despite low sequence similarity to TALE repeats, and with novel residues around the BSR. However, MOrTL1 repeats show greater sequence discriminating power than MOrTL2 repeats. Sequence alignments show that there are only three residues conserved between repeats of all TALE-like proteins including the two new additions. This conserved motif could prove useful as an identifier for future TALE-likes. Additionally, comparing MOrTL repeats with those of other TALE-likes suggests a common evolutionary origin for the TALEs, RipTALs and Bats. |
Venturelli, Sascha; Belz, Regina G; Kämper, Andreas; Berger, Alexandra; von Horn, Kyra; Wegner, André; Böcker, Alexander; Zabulon, Gérald; Langenecker, Tobias; Kohlbacher, Oliver; Barneche, Fredy; Weigel, Detlef; Lauer, Ulrich M; Bitzer, Michael; Becker, Claude Plants release precursors of potent histone deacetylase inhibitors to suppress growth of competitors Plant Cell, 27 (11), pp. 3175-89., 2015. @article{AllelopathyPlantCell, title = {Plants release precursors of potent histone deacetylase inhibitors to suppress growth of competitors}, author = {Sascha Venturelli and Regina G Belz and Andreas Kämper and Alexandra Berger and Kyra von Horn and André Wegner and Alexander Böcker and Gérald Zabulon and Tobias Langenecker and Oliver Kohlbacher and Fredy Barneche and Detlef Weigel and Ulrich M Lauer and Michael Bitzer and Claude Becker}, doi = {https://doi.org/10.1105/tpc.15.00585}, year = {2015}, date = {2015-01-01}, journal = {Plant Cell}, volume = {27}, number = {11}, pages = {3175-89.}, abstract = {To secure their access to water, light and nutrients, many plant species have developed allelopathic strategies to suppress competitors. To this end, they release phytotoxic substances that inhibit the germination and growth of neighbors. Despite the importance of allelopathy in shaping natural plant communities and for agricultural production, the underlying molecular mechanisms are largely unknown. Here we report that allelochemicals derived from the common class of hydroxamic acid root exudates directly affect the chromatin remodeling machinery in the target plant. These substances act as inhibitors of histone deacetylases both in vitro and in vivo and exert their activity through locus-specific alterations of histone acetylation and associated gene expression. Our data thus collectively show how plant-plant interactions interfere with a fundamental cellular process, histone acetylation, by targeting an evolutionarily highly conserved class of enzymes.}, keywords = {}, pubstate = {published}, tppubtype = {article} } To secure their access to water, light and nutrients, many plant species have developed allelopathic strategies to suppress competitors. To this end, they release phytotoxic substances that inhibit the germination and growth of neighbors. Despite the importance of allelopathy in shaping natural plant communities and for agricultural production, the underlying molecular mechanisms are largely unknown. Here we report that allelochemicals derived from the common class of hydroxamic acid root exudates directly affect the chromatin remodeling machinery in the target plant. These substances act as inhibitors of histone deacetylases both in vitro and in vivo and exert their activity through locus-specific alterations of histone acetylation and associated gene expression. Our data thus collectively show how plant-plant interactions interfere with a fundamental cellular process, histone acetylation, by targeting an evolutionarily highly conserved class of enzymes. |
Zaman, Uzma; Richter, Florian M; Hofele, Romina; Kramer, Katharina; Sachsenberg, Timo; Lenz, Christof; Urlaub, Henning Dithiothreitol (DTT) acts as a specific, UV-inducible cross-linker in elucidation of protein-RNA interactions Mol. Cell. Prot., 14 (12), pp. 3196-210, 2015. @article{DTTMCP2015, title = {Dithiothreitol (DTT) acts as a specific, UV-inducible cross-linker in elucidation of protein-RNA interactions}, author = {Uzma Zaman and Florian M Richter and Romina Hofele and Katharina Kramer and Timo Sachsenberg and Christof Lenz and Henning Urlaub}, doi = {https://doi.org/10.1074/mcp.M115.052795}, year = {2015}, date = {2015-01-01}, journal = {Mol. Cell. Prot.}, volume = {14}, number = {12}, pages = {3196-210}, abstract = {Protein-RNA cross-linking by UV irradiation at 254 nm wavelength has been established as an unbiased method to identify proteins in direct contact with RNA, and has been successfully applied to investigate the spatial arrangement of protein and RNA in large macromolecular assemblies, e.g. ribonucleoprotein particles (RNPs). The mass spectrometric analysis of such peptide-RNA cross-links provides high resolution structural data to the point of mapping protein-RNA interactions to specific peptides or even amino acids. However, the approach suffers from the low yield of cross-linking products, which can be addressed by improving enrichment and analysis methods. In the present paper, we introduce dithiothreitol (DTT) as a potent protein-RNA cross-linker. In order to evaluate the efficiency and specificity of DTT, we used two systems, a small synthetic peptide from smB protein incubated with U1 snRNA oligonucleotide and native ribonucleoprotein-complexes (RNPs) from S. cerevisiae. Our results unambiguously show that DTT covalently participates in cysteine-uracil crosslinks which is observable as a mass increment of 151.9966 Da, (C4H8S2O2) upon mass spectrometric analysis. DTT presents advantages for cross-linking of cysteine containing regions of proteins. This is evidenced by comparison to experiments where (tris(2-carboxyethyl)phosphine) (TCEP) is used as reducing agent, and significantly less cross-links encompassing cysteine residues are found. We further propose insertion of DTT between the cysteine and uracil reactive sites as the most probable structure of the cross-linking products.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Protein-RNA cross-linking by UV irradiation at 254 nm wavelength has been established as an unbiased method to identify proteins in direct contact with RNA, and has been successfully applied to investigate the spatial arrangement of protein and RNA in large macromolecular assemblies, e.g. ribonucleoprotein particles (RNPs). The mass spectrometric analysis of such peptide-RNA cross-links provides high resolution structural data to the point of mapping protein-RNA interactions to specific peptides or even amino acids. However, the approach suffers from the low yield of cross-linking products, which can be addressed by improving enrichment and analysis methods. In the present paper, we introduce dithiothreitol (DTT) as a potent protein-RNA cross-linker. In order to evaluate the efficiency and specificity of DTT, we used two systems, a small synthetic peptide from smB protein incubated with U1 snRNA oligonucleotide and native ribonucleoprotein-complexes (RNPs) from S. cerevisiae. Our results unambiguously show that DTT covalently participates in cysteine-uracil crosslinks which is observable as a mass increment of 151.9966 Da, (C4H8S2O2) upon mass spectrometric analysis. DTT presents advantages for cross-linking of cysteine containing regions of proteins. This is evidenced by comparison to experiments where (tris(2-carboxyethyl)phosphine) (TCEP) is used as reducing agent, and significantly less cross-links encompassing cysteine residues are found. We further propose insertion of DTT between the cysteine and uracil reactive sites as the most probable structure of the cross-linking products. |
Backert, Linus; Kohlbacher, Oliver Immunoinformatics and epitope prediction in the age of genomic medicine Genome Med., 7 (1), pp. 119, 2015. @article{GenomeMedReview2015, title = {Immunoinformatics and epitope prediction in the age of genomic medicine}, author = {Linus Backert and Oliver Kohlbacher}, doi = {https://doi.org/10.1186/s13073-015-0245-0}, year = {2015}, date = {2015-01-01}, journal = {Genome Med.}, volume = {7}, number = {1}, pages = {119}, abstract = {Immunoinformatics involves the application of computational methods to immunological problems. Prediction of B- and T-cell epitopes has long been the focus of immunoinformatics, given the potential translational implications, and many tools have been developed. With the advent of next-generation sequencing (NGS) methods, an unprecedented wealth of information has become available that requires more-advanced immunoinformatics tools. Based on information from whole-genome sequencing, exome sequencing and RNA sequencing, it is possible to characterize with high accuracy an individual's human leukocyte antigen (HLA) allotype (i.e., the individual set of HLA alleles of the patient), as well as changes arising in the HLA ligandome (the collection of peptides presented by the HLA) owing to genomic variation. This has allowed new opportunities for translational applications of epitope prediction, such as epitope-based design of prophylactic and therapeutic vaccines, and personalized cancer immunotherapies. Here, we review a wide range of immunoinformatics tools, with a focus on B- and T-cell epitope prediction. We also highlight fundamental differences in the underlying algorithms and discuss the various metrics employed to assess prediction quality, comparing their strengths and weaknesses. Finally, we discuss the new challenges and opportunities presented by high-throughput data-sets for the field of epitope prediction.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Immunoinformatics involves the application of computational methods to immunological problems. Prediction of B- and T-cell epitopes has long been the focus of immunoinformatics, given the potential translational implications, and many tools have been developed. With the advent of next-generation sequencing (NGS) methods, an unprecedented wealth of information has become available that requires more-advanced immunoinformatics tools. Based on information from whole-genome sequencing, exome sequencing and RNA sequencing, it is possible to characterize with high accuracy an individual's human leukocyte antigen (HLA) allotype (i.e., the individual set of HLA alleles of the patient), as well as changes arising in the HLA ligandome (the collection of peptides presented by the HLA) owing to genomic variation. This has allowed new opportunities for translational applications of epitope prediction, such as epitope-based design of prophylactic and therapeutic vaccines, and personalized cancer immunotherapies. Here, we review a wide range of immunoinformatics tools, with a focus on B- and T-cell epitope prediction. We also highlight fundamental differences in the underlying algorithms and discuss the various metrics employed to assess prediction quality, comparing their strengths and weaknesses. Finally, we discuss the new challenges and opportunities presented by high-throughput data-sets for the field of epitope prediction. |
Kowalewski, Daniel J; Schuster, Heiko; Backert, Linus; Berlin, Claudia; Kahn, Stefan; Kanz, Lothar; Salih, Helmut R; Rammensee, Hans-Georg; Stevanovic, Stefan; Stickel, Juliane Sarah HLA ligandome analysis identifies the underlying specificities of spontaneous antileukemia immune responses in chronic lymphocytic leukemia (CLL) Proceedings of the National Academy of Sciences of the United States of America, 112 (2), pp. E166–E175, 2015. @article{cite-key, title = {HLA ligandome analysis identifies the underlying specificities of spontaneous antileukemia immune responses in chronic lymphocytic leukemia (CLL)}, author = {Daniel J Kowalewski and Heiko Schuster and Linus Backert and Claudia Berlin and Stefan Kahn and Lothar Kanz and Helmut R Salih and Hans-Georg Rammensee and Stefan Stevanovic and Juliane Sarah Stickel}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4299203/}, year = {2015}, date = {2015-01-01}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {112}, number = {2}, pages = {E166--E175}, abstract = {Effective cancer immunotherapy relies on specific immune recognition of tumor-associated and tumor-specific antigens. In chronic lymphocytic leukemia (CLL), the highly variable course of disease implicates an underlying mechanism of tumor control by the immune system. In this study, we directly analyzed the landscape of naturally presented CLL antigens and identified targets conveying immune protection. These novel antigens might be valuable both for patient stratification and for inducing therapeutic antitumor immunity. Taken together, we demonstrate that CLL is subject to spontaneous T-cell responses targeting nonmutated antigens, which are associated with improved patient survival and provide novel options for the immunotherapy of CLL.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Effective cancer immunotherapy relies on specific immune recognition of tumor-associated and tumor-specific antigens. In chronic lymphocytic leukemia (CLL), the highly variable course of disease implicates an underlying mechanism of tumor control by the immune system. In this study, we directly analyzed the landscape of naturally presented CLL antigens and identified targets conveying immune protection. These novel antigens might be valuable both for patient stratification and for inducing therapeutic antitumor immunity. Taken together, we demonstrate that CLL is subject to spontaneous T-cell responses targeting nonmutated antigens, which are associated with improved patient survival and provide novel options for the immunotherapy of CLL. |
Bier, David; Thiel, Philipp; Briels, Jeroen; Ottmann, Christian Stabilization of Protein-Protein Interactions in chemical biology and drug discovery. Prog Biophys Mol Biol, 119 (1), pp. 10-9, 2015, (automatic medline import). @article{BierEtAl2015, title = {Stabilization of Protein-Protein Interactions in chemical biology and drug discovery.}, author = {David Bier and Philipp Thiel and Jeroen Briels and Christian Ottmann}, doi = {https://doi.org/10.1016/j.pbiomolbio.2015.05.002}, year = {2015}, date = {2015-01-01}, journal = {Prog Biophys Mol Biol}, volume = {119}, number = {1}, pages = {10-9}, abstract = {More than 300,000 Protein-Protein Interactions (PPIs) can be found in human cells. This number is significantly larger than the number of single proteins, which are the classical targets for pharmacological intervention. Hence, specific and potent modulation of PPIs by small, drug-like molecules would tremendously enlarge the "druggable genome" enabling novel ways of drug discovery for essentially every human disease. This strategy is especially promising in diseases with difficult targets like intrinsically disordered proteins or transcription factors, for example neurodegeneration or metabolic diseases. Whereas the potential of PPI modulation has been recognized in terms of the development of inhibitors that disrupt or prevent a binary protein complex, the opposite (or complementary) strategy to stabilize PPIs has not yet been realizedin a systematic manner. This fact is rather surprising given the number of impressive natural product examples that confer their activity by stabilizing specific PPIs. In addition, in recent years more and more examples of synthetic molecules are being published that work as PPI stabilizers, despite the fact that in the majority they initially have not been designed as such. Here, we describeexamples from both the natural products as well as the synthetic molecules advocating for a stronger consideration of the PPI stabilization approach in chemical biology and drug discovery.}, note = {automatic medline import}, keywords = {}, pubstate = {published}, tppubtype = {article} } More than 300,000 Protein-Protein Interactions (PPIs) can be found in human cells. This number is significantly larger than the number of single proteins, which are the classical targets for pharmacological intervention. Hence, specific and potent modulation of PPIs by small, drug-like molecules would tremendously enlarge the "druggable genome" enabling novel ways of drug discovery for essentially every human disease. This strategy is especially promising in diseases with difficult targets like intrinsically disordered proteins or transcription factors, for example neurodegeneration or metabolic diseases. Whereas the potential of PPI modulation has been recognized in terms of the development of inhibitors that disrupt or prevent a binary protein complex, the opposite (or complementary) strategy to stabilize PPIs has not yet been realizedin a systematic manner. This fact is rather surprising given the number of impressive natural product examples that confer their activity by stabilizing specific PPIs. In addition, in recent years more and more examples of synthetic molecules are being published that work as PPI stabilizers, despite the fact that in the majority they initially have not been designed as such. Here, we describeexamples from both the natural products as well as the synthetic molecules advocating for a stronger consideration of the PPI stabilization approach in chemical biology and drug discovery. |
Nicoludis, John M; Lau, Sze-Yi; Schärfe, Charlotta P.I; Marks, Debora S; Weihofen, Wilhelm A; Gaudet, Rachelle Structure and Sequence Analyses of Clustered Protocadherins Reveal Antiparallel Interactions that Mediate Homophilic Specificity Structure, 23 (11), pp. 2087 - 2098, 2015. @article{Nicoludis20152087, title = {Structure and Sequence Analyses of Clustered Protocadherins Reveal Antiparallel Interactions that Mediate Homophilic Specificity}, author = {John M Nicoludis and Sze-Yi Lau and Charlotta P.I Schärfe and Debora S Marks and Wilhelm A Weihofen and Rachelle Gaudet}, url = {http://www.sciencedirect.com/science/article/pii/S0969212615003755}, year = {2015}, date = {2015-01-01}, journal = {Structure}, volume = {23}, number = {11}, pages = {2087 - 2098}, abstract = {Summary Clustered protocadherin (Pcdh) proteins mediate dendritic self-avoidance in neurons via specific homophilic interactions in their extracellular cadherin (EC) domains. We determined crystal structures of EC1–EC3, containing the homophilic specificity-determining region, of two mouse clustered Pcdh isoforms (PcdhγA1 and PcdhγC3) to investigate the nature of the homophilic interaction. Within the crystal lattices, we observe antiparallel interfaces consistent with a role in trans cell-cell contact. Antiparallel dimerization is supported by evolutionary correlations. Two interfaces, located primarily on EC2-EC3, involve distinctive clustered Pcdh structure and sequence motifs, lack predicted glycosylation sites, and contain residues highly conserved in orthologs but not paralogs, pointing toward their biological significance as homophilic interaction interfaces. These two interfaces are similar yet distinct, reflecting a possible difference in interaction architecture between clustered Pcdh subfamilies. These structures initiate a molecular understanding of clustered Pcdh assemblies that are required to produce functional neuronal networks.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Summary Clustered protocadherin (Pcdh) proteins mediate dendritic self-avoidance in neurons via specific homophilic interactions in their extracellular cadherin (EC) domains. We determined crystal structures of EC1–EC3, containing the homophilic specificity-determining region, of two mouse clustered Pcdh isoforms (PcdhγA1 and PcdhγC3) to investigate the nature of the homophilic interaction. Within the crystal lattices, we observe antiparallel interfaces consistent with a role in trans cell-cell contact. Antiparallel dimerization is supported by evolutionary correlations. Two interfaces, located primarily on EC2-EC3, involve distinctive clustered Pcdh structure and sequence motifs, lack predicted glycosylation sites, and contain residues highly conserved in orthologs but not paralogs, pointing toward their biological significance as homophilic interaction interfaces. These two interfaces are similar yet distinct, reflecting a possible difference in interaction architecture between clustered Pcdh subfamilies. These structures initiate a molecular understanding of clustered Pcdh assemblies that are required to produce functional neuronal networks. |
Kenar, Erhan; Franken, Holger; Forcisi, Sara; Wörmann, Kilian; Häring, Hans-Ulrich; Lehmann, Rainer; Schmitt-Kopplin, Philippe; Zell, Andreas; Kohlbacher, Oliver Automated Label-Free Quantification of Metabolites from LC-MS Data Mol. Cell. Prot., 13 (1), pp. 348-59, 2014. @article{FeaFiMetaboMCP13, title = {Automated Label-Free Quantification of Metabolites from LC-MS Data}, author = {Erhan Kenar and Holger Franken and Sara Forcisi and Kilian Wörmann and Hans-Ulrich Häring and Rainer Lehmann and Philippe Schmitt-Kopplin and Andreas Zell and Oliver Kohlbacher}, doi = {https://doi.org/10.1074/mcp.M113.031278}, year = {2014}, date = {2014-01-01}, journal = {Mol. Cell. Prot.}, volume = {13}, number = {1}, pages = {348-59}, abstract = {Liquid chromatography coupled to mass spectrometry (LC-MS) has become a standard technology in metabolomics. In particular, label-free quantification based on LC-MS is easily amenable to large-scale studies and is thus easily amenable to clinical metabolomics. Large-scale studies, however, require automated processing of the large and complex LC-MS datasets. We present a novel algorithm for the detection of mass traces and their aggregation into features (i.e., all signals caused by the same analyte species) that is computationally efficient, sensitive, and leads to reproducible quantification results. The algorithm is based on a sensitive detection of mass traces, which are then assembled into features based on mass-to-charge spacing, co-elution information, and a support vector machine (SVM)-based classifier able to identify potential metabolite isotope patterns. The algorithm is not limited to metabolites but is applicable to a wide range of small molecules (e.g., lipidomics, peptidomics) as well as to other separation technologies (e.g., CE-MS). We assess the algorithm’s robustness to varying noise levels on synthetic data and then validate the approach on experimental data investigating human plasma samples. We obtain excellent results in a fully automated data processing pipeline both with respect to accuracy and reproducibility. Compared to state-of-the art algorithms, we can demonstrate increased precision and recall of the method. The algorithm is available as part of the open-source software package OpenMS (www.OpenMS.de) and runs on all major operating systems.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Liquid chromatography coupled to mass spectrometry (LC-MS) has become a standard technology in metabolomics. In particular, label-free quantification based on LC-MS is easily amenable to large-scale studies and is thus easily amenable to clinical metabolomics. Large-scale studies, however, require automated processing of the large and complex LC-MS datasets. We present a novel algorithm for the detection of mass traces and their aggregation into features (i.e., all signals caused by the same analyte species) that is computationally efficient, sensitive, and leads to reproducible quantification results. The algorithm is based on a sensitive detection of mass traces, which are then assembled into features based on mass-to-charge spacing, co-elution information, and a support vector machine (SVM)-based classifier able to identify potential metabolite isotope patterns. The algorithm is not limited to metabolites but is applicable to a wide range of small molecules (e.g., lipidomics, peptidomics) as well as to other separation technologies (e.g., CE-MS). We assess the algorithm’s robustness to varying noise levels on synthetic data and then validate the approach on experimental data investigating human plasma samples. We obtain excellent results in a fully automated data processing pipeline both with respect to accuracy and reproducibility. Compared to state-of-the art algorithms, we can demonstrate increased precision and recall of the method. The algorithm is available as part of the open-source software package OpenMS (www.OpenMS.de) and runs on all major operating systems. |
Beck, Florian; Geiger, Jörg; Gambaryan, Stepan; Veit, Johannes; Vaudel, Marc; Nollau, Peter; Kohlbacher, Oliver; Martens, Lennart; Walter, Ulrich; Sickmann, Albert; Zahedi, René P Time-resolved characterization of cAMP/PKA-dependant signaling reveals that platelet inhibition is a concerted process involving multiple signaling pathways Blood, 123 (4), pp. e1-e10, 2014. @article{PlateletsBlood2013, title = {Time-resolved characterization of cAMP/PKA-dependant signaling reveals that platelet inhibition is a concerted process involving multiple signaling pathways}, author = {Florian Beck and Jörg Geiger and Stepan Gambaryan and Johannes Veit and Marc Vaudel and Peter Nollau and Oliver Kohlbacher and Lennart Martens and Ulrich Walter and Albert Sickmann and René P Zahedi}, doi = {https://doi.org/10.1182/blood-2013-07-512384}, year = {2014}, date = {2014-01-01}, journal = {Blood}, volume = {123}, number = {4}, pages = {e1-e10}, abstract = {One of the most important physiological platelet inhibitors is endothelium-derived prostacyclin which stimulates the platelet cAMP/PKA signaling cascade and inhibits virtually all platelet activating key mechanisms. Using quantitative mass spectrometry, we analyzed time resolved phosphorylation patterns in human platelets after treatment with Iloprost, a stable prostacyclin analogue, for 0, 10, 30 and 60 seconds to characterize key mediators of platelet inhibition and activation in three independent biological replicates. We quantified over 2,700 different phosphorylated peptides of which 360 were significantly regulated upon stimulation. This comprehensive and time-resolved analysis indicates that platelet inhibition is a multi-pronged process involving different kinases and phosphatases as well as many previously unanticipated proteins and pathways.}, keywords = {}, pubstate = {published}, tppubtype = {article} } One of the most important physiological platelet inhibitors is endothelium-derived prostacyclin which stimulates the platelet cAMP/PKA signaling cascade and inhibits virtually all platelet activating key mechanisms. Using quantitative mass spectrometry, we analyzed time resolved phosphorylation patterns in human platelets after treatment with Iloprost, a stable prostacyclin analogue, for 0, 10, 30 and 60 seconds to characterize key mediators of platelet inhibition and activation in three independent biological replicates. We quantified over 2,700 different phosphorylated peptides of which 360 were significantly regulated upon stimulation. This comprehensive and time-resolved analysis indicates that platelet inhibition is a multi-pronged process involving different kinases and phosphatases as well as many previously unanticipated proteins and pathways. |
Gerasch, Andreas; Kaufmann, Michael; Kohlbacher, Oliver Rebuilding KEGG Maps - An integrative approach for visual analytics of metabolic networks Pacific Visualization Symposium (PacificVis), 2014 IEEE, pp. 97-104, IEEE 2014. @inproceedings{RebuildingKEGGMaps, title = {Rebuilding KEGG Maps - An integrative approach for visual analytics of metabolic networks}, author = {Andreas Gerasch and Michael Kaufmann and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1109/PacificVis.2014.45}, year = {2014}, date = {2014-01-01}, booktitle = {Pacific Visualization Symposium (PacificVis), 2014 IEEE}, pages = {97-104}, organization = {IEEE}, abstract = {Static drawings of biological pathways are still an important research tool for biologists. Gerhard Michal created his seminal drawings of metabolic networks in the 1960s and thus defined canonical representations of some key pathways. The Kyoto Encyclopedia of Genes and Genomes (KEGG) provides the most popular static drawings of biological networks of different types, used in a huge number of publications. These drawings are so widely known that they are immediately recognizable to most biologists. This enables collaborative work and simplifies the communication of analysis results. Automatic layout of these pathway maps is complicated by the fact that the information available from KEGG does not contain the entire layout information of the reference maps. Here we present a fully automated algorithm for interactive KEGG layout construction. The algorithm conserves the original KEGG layout to the extent possible while improving readability by removing unnecessary elements (in organism-specific maps). Multiple pathway maps can be laid out simultaneously to facilitate the navigation of larger networks. The algorithm supports the hierarchical layout of sub networks and thus supports interactive exploration of large datasets.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Static drawings of biological pathways are still an important research tool for biologists. Gerhard Michal created his seminal drawings of metabolic networks in the 1960s and thus defined canonical representations of some key pathways. The Kyoto Encyclopedia of Genes and Genomes (KEGG) provides the most popular static drawings of biological networks of different types, used in a huge number of publications. These drawings are so widely known that they are immediately recognizable to most biologists. This enables collaborative work and simplifies the communication of analysis results. Automatic layout of these pathway maps is complicated by the fact that the information available from KEGG does not contain the entire layout information of the reference maps. Here we present a fully automated algorithm for interactive KEGG layout construction. The algorithm conserves the original KEGG layout to the extent possible while improving readability by removing unnecessary elements (in organism-specific maps). Multiple pathway maps can be laid out simultaneously to facilitate the navigation of larger networks. The algorithm supports the hierarchical layout of sub networks and thus supports interactive exploration of large datasets. |
Gerasch, Andreas; Faber, Daniel; Küntzer, Jan; Niermann, Peter; Kohlbacher, Oliver; Lenhof, Hans-Peter; Kaufmann, Michael BiNA: a visual analytics tool for biological network data PLoS ONE, 9 (2), pp. e87397, 2014. @article{BiNA_PLoS_ONE, title = {BiNA: a visual analytics tool for biological network data}, author = {Andreas Gerasch and Daniel Faber and Jan Küntzer and Peter Niermann and Oliver Kohlbacher and Hans-Peter Lenhof and Michael Kaufmann}, doi = {https://dx.doi.org/10.1371%2Fjournal.pone.0087397}, year = {2014}, date = {2014-01-01}, journal = {PLoS ONE}, volume = {9}, number = {2}, pages = {e87397}, abstract = {Interactive visual analysis of biological high-throughput data in the context of the underlying networks is an essential task in modern biomedicine with applications ranging from metabolic engineering to personalized medicine. The complexity and heterogeneity of data sets require flexible software architectures for data analysis. Concise and easily readable graphical representation of data and interactive navigation of large data sets are essential in this context. We present BiNA - the Biological Network Analyzer - a flexible open-source software for analyzing and visualizing biological networks. Highly configurable visualization styles for regulatory and metabolic network data offer sophisticated drawings and intuitive navigation and exploration techniques using hierarchical graph concepts. The generic projection and analysis framework provides powerful functionalities for visual analyses of high-throughput omics data in the context of networks, in particular for the differential analysis and the analysis of time series data. A direct interface to an underlying data warehouse provides fast access to a wide range of semantically integrated biological network databases. A plugin system allows simple customization and integration of new analysis algorithms or visual representations. BiNA is available under the 3-clause BSD license at http://bina.unipax.info/.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Interactive visual analysis of biological high-throughput data in the context of the underlying networks is an essential task in modern biomedicine with applications ranging from metabolic engineering to personalized medicine. The complexity and heterogeneity of data sets require flexible software architectures for data analysis. Concise and easily readable graphical representation of data and interactive navigation of large data sets are essential in this context. We present BiNA - the Biological Network Analyzer - a flexible open-source software for analyzing and visualizing biological networks. Highly configurable visualization styles for regulatory and metabolic network data offer sophisticated drawings and intuitive navigation and exploration techniques using hierarchical graph concepts. The generic projection and analysis framework provides powerful functionalities for visual analyses of high-throughput omics data in the context of networks, in particular for the differential analysis and the analysis of time series data. A direct interface to an underlying data warehouse provides fast access to a wide range of semantically integrated biological network databases. A plugin system allows simple customization and integration of new analysis algorithms or visual representations. BiNA is available under the 3-clause BSD license at http://bina.unipax.info/. |
Kohlbacher, Oliver; Schreiber, Falk; Ward, Matthew O Multivariate Networks in the Life Sciences Multivariate Network Visualization 2013, Chapter 4, pp. 61-73, Springer, 2014. @inbook{MultiVariateBio2014, title = {Multivariate Networks in the Life Sciences}, author = {Oliver Kohlbacher and Falk Schreiber and Matthew O Ward}, url = {https://link.springer.com/chapter/10.1007/978-3-319-06793-3_4}, year = {2014}, date = {2014-01-01}, booktitle = {Multivariate Network Visualization 2013}, pages = {61-73}, publisher = {Springer}, chapter = {4}, series = {Lecture Notes in Computer Science (LNCS 8380)}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
Roberts, Jonathan C; Yang, Jing; Kohlbacher, Oliver; Ward, Matthew O; Zhou, Michelle X Novel Visual Metaphors for Multivariate Networks Multivariate Network Visualization 2013, Chapter 7, pp. 127-147, Springer, 2014. @inbook{05a28de3994d7a6e4119aa0bfe4dca32, title = {Novel Visual Metaphors for Multivariate Networks}, author = {Jonathan C Roberts and Jing Yang and Oliver Kohlbacher and Matthew O Ward and Michelle X Zhou}, url = {https://link.springer.com/chapter/10.1007/978-3-319-06793-3_7}, year = {2014}, date = {2014-01-01}, booktitle = {Multivariate Network Visualization 2013}, pages = {127-147}, publisher = {Springer}, chapter = {7}, series = {Lecture Notes in Computer Science (LNCS 8380)}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
Aebersold, Rudolph; Kohlbacher, Oliver; Vitek, Olga (Ed.) Report from Dagstuhl Seminar 13491 Computational Mass Spectrometry Dagstuhl Publishing, Dagstuhl, Germany, 3 (12), 2014. @proceedings{DagstuhlCompMS2014, title = {Report from Dagstuhl Seminar 13491 Computational Mass Spectrometry}, editor = {Rudolph Aebersold and Oliver Kohlbacher and Olga Vitek}, url = {http://dx.doi.org/10.4230/DagRep.3.12.1}, year = {2014}, date = {2014-01-01}, volume = {3}, number = {12}, publisher = {Dagstuhl Publishing}, address = {Dagstuhl, Germany}, series = {Dagstuhl Reports}, abstract = {The last decade has brought tremendous technological advances in mass spectrometry, which in turn have enabled new applications of mass spectrometry in the life sciences. Proteomics, metabolomics, lipidomics, glycomics and related fields have gotten a massive boost, which also resulted in vastly increased amount of data produced and increased complexity of these data sets. An efficient and accurate analysis of these data sets has become the key bottleneck in the field. The seminar ’Computational Mass Spectrometry’ brought together scientist from mass spetrometry and bioinformatics, from industry and academia to discuss the state of the art in computational mass spectrometry. The participants discussed a number of current topics, for example new and upcoming technologies, the challenges posed by new types of experiments, the challenges of the growing data volume (’big data’), or challenges for education in several working groups. The seminar reviewed the state of the art in computational mass spectrometry and summarized the upcoming challenges. The seminar also led to the creation of structures to support the computational mass spectrometry community (the formation of an ISCB Community of Interest and a HUPO subgroup on computational mass spectrometry). This community will also carry on with some of the efforts initiated during the seminar, in particular with the establishment of a computational mass spectrometry curriculum that was drafted in Dagstuhl.}, keywords = {}, pubstate = {published}, tppubtype = {proceedings} } The last decade has brought tremendous technological advances in mass spectrometry, which in turn have enabled new applications of mass spectrometry in the life sciences. Proteomics, metabolomics, lipidomics, glycomics and related fields have gotten a massive boost, which also resulted in vastly increased amount of data produced and increased complexity of these data sets. An efficient and accurate analysis of these data sets has become the key bottleneck in the field. The seminar ’Computational Mass Spectrometry’ brought together scientist from mass spetrometry and bioinformatics, from industry and academia to discuss the state of the art in computational mass spectrometry. The participants discussed a number of current topics, for example new and upcoming technologies, the challenges posed by new types of experiments, the challenges of the growing data volume (’big data’), or challenges for education in several working groups. The seminar reviewed the state of the art in computational mass spectrometry and summarized the upcoming challenges. The seminar also led to the creation of structures to support the computational mass spectrometry community (the formation of an ISCB Community of Interest and a HUPO subgroup on computational mass spectrometry). This community will also carry on with some of the efforts initiated during the seminar, in particular with the establishment of a computational mass spectrometry curriculum that was drafted in Dagstuhl. |
Menzel, Moritz; Meckbach, Diana; Weide, Benjamin; Toussaint, Nora C; Schilbach, Karin; Noor, Seema; Eigentler, Thomas; Ikenberg, Kristian; Busch, Christian; Qunitanilla-Martinez, Leticia; Göke, Antonia; Göke, Friederike; Handgretinger, Rupert; Ottmann, Christian; Bastian, Boris; Garbe, Claus; Röcken, Martin; Perner, Sven; Kohlbacher, Oliver; Bauer, Jürgen In melanoma, Hippo signaling is affected by copy number alterations and YAP1 overexpression impairs patient survival Pigment Cell Melanoma Res., 23 (3), pp. E45, 2014. @article{YapPCMR2014, title = {In melanoma, Hippo signaling is affected by copy number alterations and YAP1 overexpression impairs patient survival}, author = {Moritz Menzel and Diana Meckbach and Benjamin Weide and Nora C Toussaint and Karin Schilbach and Seema Noor and Thomas Eigentler and Kristian Ikenberg and Christian Busch and Leticia Qunitanilla-Martinez and Antonia Göke and Friederike Göke and Rupert Handgretinger and Christian Ottmann and Boris Bastian and Claus Garbe and Martin Röcken and Sven Perner and Oliver Kohlbacher and Jürgen Bauer}, doi = {https://doi.org/10.1111/pcmr.12249}, year = {2014}, date = {2014-01-01}, journal = {Pigment Cell Melanoma Res.}, volume = {23}, number = {3}, pages = {E45}, abstract = {Melanoma is a highly aggressive neoplasm that metastasizes early during progression. The genetic basis of melanoma invasion and metastasis is only partially understood. Recently, it was shown that Yes-associated protein 1 (YAP1), an oncogenic driver negatively regulated by the Hippo signaling pathway, contributes to melanoma invasion. Here we show focused amplifications of YAP1, the upstream kinase PAK1, and focused deletions of its negative regulators NF2 and LATS1 in 34.5% of melanomas. YAP1 protein is highly expressed in 56% of thick (≥2 mm) primary melanomas and its expression is correlated with YAP1 amplification and tumor thickness. Survival analysis of 380 primary melanomas reveals that high YAP1 expression significantly correlates with poor patient survival (p=0.013). In conclusion, these results demonstrate that Hippo signaling is a frequent target of copy number alterations and that YAP1 overexpression negatively affects survival in human melanoma.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Melanoma is a highly aggressive neoplasm that metastasizes early during progression. The genetic basis of melanoma invasion and metastasis is only partially understood. Recently, it was shown that Yes-associated protein 1 (YAP1), an oncogenic driver negatively regulated by the Hippo signaling pathway, contributes to melanoma invasion. Here we show focused amplifications of YAP1, the upstream kinase PAK1, and focused deletions of its negative regulators NF2 and LATS1 in 34.5% of melanomas. YAP1 protein is highly expressed in 56% of thick (≥2 mm) primary melanomas and its expression is correlated with YAP1 amplification and tumor thickness. Survival analysis of 380 primary melanomas reveals that high YAP1 expression significantly correlates with poor patient survival (p=0.013). In conclusion, these results demonstrate that Hippo signaling is a frequent target of copy number alterations and that YAP1 overexpression negatively affects survival in human melanoma. |
Walzer, Mathias; Pernas, Lucia Espona; Nasso, Sara; Bittremieux, Wout; Nahnsen, Sven; Kelchtermans, Pieter; Pichler, Peter; van den Toorn, Henk W P; Staes, An; Vandenbussche, Jonathan; Mazanek, Michael; Taus, Thomas; Scheltema, Richard A; Kelstrup, Christian D; Gatto, Laurent; van Breukelen, Bas; Aiche, Stephan; Valkenborg, Dirk; Laukens, Kris; Lilley, Kathryn S; Olsen, Jesper Velgaard; Heck, Albert J R; Mechtler, Karl; Aebersold, Ruedi; Gevaert, Kris; Vizcaino, Juan Antonio; Hermjakob, Henning; Kohlbacher, Oliver; Martens, Lennart qcML: an exchange format for quality control metrics from mass spectrometry experiments Mol. Cell. Prot., 13 (8), pp. 1905-13, 2014. @article{qcML_MCP_2014, title = {qcML: an exchange format for quality control metrics from mass spectrometry experiments}, author = {Mathias Walzer and Lucia Espona Pernas and Sara Nasso and Wout Bittremieux and Sven Nahnsen and Pieter Kelchtermans and Peter Pichler and Henk W P van den Toorn and An Staes and Jonathan Vandenbussche and Michael Mazanek and Thomas Taus and Richard A Scheltema and Christian D Kelstrup and Laurent Gatto and Bas van Breukelen and Stephan Aiche and Dirk Valkenborg and Kris Laukens and Kathryn S Lilley and Jesper Velgaard Olsen and Albert J R Heck and Karl Mechtler and Ruedi Aebersold and Kris Gevaert and Juan Antonio Vizcaino and Henning Hermjakob and Oliver Kohlbacher and Lennart Martens}, url = {http://www.mcponline.org/content/early/2014/04/23/mcp.M113.035907.abstract}, year = {2014}, date = {2014-01-01}, journal = {Mol. Cell. Prot.}, volume = {13}, number = {8}, pages = {1905-13}, abstract = {Quality control is increasingly recognized as a crucial aspect of mass spectrometry based proteomics. Several recent papers discuss relevant parameters for quality control and present applications to extract these from the instrumental raw data. What has been missing, however, is a standard data exchange format for reporting these performance metrics. We therefore developed the qcML format, an XML-based standard that follows the design principles of the related mzML, mzIdentML, mzQuantML and TraML standards from the HUPO-PSI (Proteomics Standards Initiative). In addition to the XML format, we also provide tools for the calculation of a wide range of quality metrics as well as a database format and interconversion tools, so that existing LIMS systems can easily add relational storage of the quality control data to their existing schema. We here describe the qcML specification, along with possible use cases and an illustrative example of the subsequent analysis possibilities. All information about qcML is available at http://code.google.com/p/qcml.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Quality control is increasingly recognized as a crucial aspect of mass spectrometry based proteomics. Several recent papers discuss relevant parameters for quality control and present applications to extract these from the instrumental raw data. What has been missing, however, is a standard data exchange format for reporting these performance metrics. We therefore developed the qcML format, an XML-based standard that follows the design principles of the related mzML, mzIdentML, mzQuantML and TraML standards from the HUPO-PSI (Proteomics Standards Initiative). In addition to the XML format, we also provide tools for the calculation of a wide range of quality metrics as well as a database format and interconversion tools, so that existing LIMS systems can easily add relational storage of the quality control data to their existing schema. We here describe the qcML specification, along with possible use cases and an illustrative example of the subsequent analysis possibilities. All information about qcML is available at http://code.google.com/p/qcml. |
Krüger, Jens; Grunzke, Richard; Gesing, Sandra; Breuers, Sebastian; Brinkmann, André; de la Garza, Luis; Kohlbacher, Oliver; Kruse, Martin; Nagel, Wolfgang; Packschies, Lars; Müller-Pfefferkorn, Ralph; Schärfe, Charlotta; Steinke, Thomas; Schlemmer, Tobias; Warzecha, Klaus; Zink, Andreas; Herres-Pawlis, Sonja The MoSGrid Science Gateway – A Complete Solution for Molecular Simulations J. Chem. Theor. Comput., 10 (6), pp. 2232-2245, 2014. @article{MoSGrid2014, title = {The MoSGrid Science Gateway – A Complete Solution for Molecular Simulations}, author = {Jens Krüger and Richard Grunzke and Sandra Gesing and Sebastian Breuers and André Brinkmann and Luis de la Garza and Oliver Kohlbacher and Martin Kruse and Wolfgang Nagel and Lars Packschies and Ralph Müller-Pfefferkorn and Charlotta Schärfe and Thomas Steinke and Tobias Schlemmer and Klaus Warzecha and Andreas Zink and Sonja Herres-Pawlis}, url = {http://pubs.acs.org/doi/abs/10.1021/ct500159h}, year = {2014}, date = {2014-01-01}, journal = {J. Chem. Theor. Comput.}, volume = {10}, number = {6}, pages = {2232-2245}, abstract = {The MoSGrid portal offers an approach to carry out high-quality molecular simulations on distributed compute infrastructures to scientists with all kinds of background and experience levels. A user-friendly Web interface guarantees the ease-of-use of modern chemical simulation applications well established in the field. The usage of well-defined workflows annotated with metadata largely improves the reproducibility of simulations in the sense of good lab practice. The MoSGrid science gateway supports applications in the domains quantum chemistry (QC), molecular dynamics (MD), and docking. This paper presents the open-source MoSGrid architecture as well as lessons learned from its design.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The MoSGrid portal offers an approach to carry out high-quality molecular simulations on distributed compute infrastructures to scientists with all kinds of background and experience levels. A user-friendly Web interface guarantees the ease-of-use of modern chemical simulation applications well established in the field. The usage of well-defined workflows annotated with metadata largely improves the reproducibility of simulations in the sense of good lab practice. The MoSGrid science gateway supports applications in the domains quantum chemistry (QC), molecular dynamics (MD), and docking. This paper presents the open-source MoSGrid architecture as well as lessons learned from its design. |
Wagner, Robert; Li, Jia; Kenar, Erhan; Kohlbacher, Oliver; Machicao, Fausto; Häring, Hans-Ulrich; Fritsche, Andreas; Xu, Guowang; Lehmann, Rainer Clinical and non-targeted metabolomic profiling of homozygous carriers of Transcription Factor 7-like 2 variant rs7903146 Sci. Rep., 4 , pp. 5296, 2014. @article{TCF7L22014, title = {Clinical and non-targeted metabolomic profiling of homozygous carriers of Transcription Factor 7-like 2 variant rs7903146}, author = {Robert Wagner and Jia Li and Erhan Kenar and Oliver Kohlbacher and Fausto Machicao and Hans-Ulrich Häring and Andreas Fritsche and Guowang Xu and Rainer Lehmann}, doi = {https://dx.doi.org/10.1038%2Fsrep05296}, year = {2014}, date = {2014-01-01}, journal = {Sci. Rep.}, volume = {4}, pages = {5296}, abstract = {An important role of the type 2 diabetes risk variant rs7903146 in TCF7L2 in metabolic actions of various tissues, in particular of the liver, has recently been demonstrated by functional animal studies. Accordingly, the TT diabetes risk allele may lead to currently unknown alterations in human. Our study revealed no differences in the kinetics of glucose, insulin, C-peptide and non-esterified fatty acids during an OGTT in homozygous participants from a German diabetes risk cohort (n = 1832) carrying either the rs7903146 CC (n = 15) or the TT (n = 15) genotype. However, beta-cell function was impaired for TT carriers. Covering more than 4000 metabolite ions the plasma metabolome did not reveal any differences between genotypes. Our study argues against a relevant impact of TCF7L2 rs7903146 on the systemic level in humans, but confirms the role in the pathogenesis of type 2 diabetes in humans as a mechanism impairing insulin secretion.}, keywords = {}, pubstate = {published}, tppubtype = {article} } An important role of the type 2 diabetes risk variant rs7903146 in TCF7L2 in metabolic actions of various tissues, in particular of the liver, has recently been demonstrated by functional animal studies. Accordingly, the TT diabetes risk allele may lead to currently unknown alterations in human. Our study revealed no differences in the kinetics of glucose, insulin, C-peptide and non-esterified fatty acids during an OGTT in homozygous participants from a German diabetes risk cohort (n = 1832) carrying either the rs7903146 CC (n = 15) or the TT (n = 15) genotype. However, beta-cell function was impaired for TT carriers. Covering more than 4000 metabolite ions the plasma metabolome did not reveal any differences between genotypes. Our study argues against a relevant impact of TCF7L2 rs7903146 on the systemic level in humans, but confirms the role in the pathogenesis of type 2 diabetes in humans as a mechanism impairing insulin secretion. |
Krüger, Jens; Grunzke, Richard; Herres-Pawlies, Sonja; de la Garza, Luis; Kohlbacher, Oliver; Nagel, Wolfgang E; Gesing, Sandra Performance Studies on Distributed Virtual Screening Biomed Res Int, 2014 , pp. 624024, 2014. @article{articlereference.2014-06-18.2262311133, title = {Performance Studies on Distributed Virtual Screening}, author = {Jens Krüger and Richard Grunzke and Sonja Herres-Pawlies and Luis de la Garza and Oliver Kohlbacher and Wolfgang E Nagel and Sandra Gesing}, url = {http://www.hindawi.com/journals/bmri/2014/624024/}, year = {2014}, date = {2014-01-01}, journal = {Biomed Res Int}, volume = {2014}, pages = {624024}, abstract = {Virtual high-throughput screening (vHTS) is an invaluable method in modern drug discovery. It permits screening large datasets or databases of chemical structures for those structures binding possibly to a drug target. Virtual screening is typically performed by docking code, which often runs sequentially. Processing of huge vHTS datasets can be parallelized by chunking the data because individual docking runs are independent of each other. The goal of this work is to find an optimal splitting maximizing the speedup while considering overhead and available cores on Distributed Computing Infrastructures (DCIs). We have conducted thorough performance studies accounting not only for the runtime of the docking itself, but also for structure preparation. Performance studies were conducted via the workflow-enabled science gateway MoSGrid (Molecular Simulation Grid). As input we used benchmark datasets for protein kinases. Our performance studies show that docking workflows can be made to scale almost linearly up to 500 concurrent processes distributed even over large DCIs, thus accelerating vHTS campaigns significantly.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Virtual high-throughput screening (vHTS) is an invaluable method in modern drug discovery. It permits screening large datasets or databases of chemical structures for those structures binding possibly to a drug target. Virtual screening is typically performed by docking code, which often runs sequentially. Processing of huge vHTS datasets can be parallelized by chunking the data because individual docking runs are independent of each other. The goal of this work is to find an optimal splitting maximizing the speedup while considering overhead and available cores on Distributed Computing Infrastructures (DCIs). We have conducted thorough performance studies accounting not only for the runtime of the docking itself, but also for structure preparation. Performance studies were conducted via the workflow-enabled science gateway MoSGrid (Molecular Simulation Grid). As input we used benchmark datasets for protein kinases. Our performance studies show that docking workflows can be made to scale almost linearly up to 500 concurrent processes distributed even over large DCIs, thus accelerating vHTS campaigns significantly. |
Griss, Johannes; Jones, Andrew R; Sachsenberg, Timo; Walzer, Mathias; Gatto, Laurent; Hartler, Jürgen; Thallinger, Gerhard G; Salek, Reza M; Steinbeck, Christhop; Neuhauser, Nadin; Cox, Jürgen; Neumann, Steffen; Fan, Jun; Reisinger, Florian; Xu, Qing-Wei; Bandeira, Nuno; Xenarios, Ioannis; Kohlbacher, Oliver; Vizcaino, Juan Antonio; Hermjakob, Henning The mzTab Data Exchange Format: communicating MS-based proteomics and metabolomics experimental results to a wider audience Mol. Cell. Prot., pp. mcp.O113.036681, 2014. @article{mzTab_MCP_2014, title = {The mzTab Data Exchange Format: communicating MS-based proteomics and metabolomics experimental results to a wider audience}, author = {Johannes Griss and Andrew R Jones and Timo Sachsenberg and Mathias Walzer and Laurent Gatto and Jürgen Hartler and Gerhard G Thallinger and Reza M Salek and Christhop Steinbeck and Nadin Neuhauser and Jürgen Cox and Steffen Neumann and Jun Fan and Florian Reisinger and Qing-Wei Xu and Nuno Bandeira and Ioannis Xenarios and Oliver Kohlbacher and Juan Antonio Vizcaino and Henning Hermjakob}, doi = {https://doi.org/10.1074/mcp.O113.036681}, year = {2014}, date = {2014-01-01}, journal = {Mol. Cell. Prot.}, pages = {mcp.O113.036681}, abstract = {The HUPO Proteomics Standards Initiative (PSI) has developed several standardized data formats to facilitate data sharing in mass spectrometry (MS) based proteomics. These allow researchers to report their complete results in a unified way. However, at present, there is no format to describe the final qualitative and quantitative results for proteomics and metabolomics experiments in a simple tabular format. Many downstream analysis use cases are only concerned with the final results of an experiment and require an easily accessible format, compatible with tools like Microsoft Excel or R. We developed the mzTab file format for MS-based proteomics and metabolomics results to meet this need. mzTab is intended as a lightweight supplement to the existing standard XML-based file formats (mzML, mzIdentML, mzQuantML), providing a comprehensive summary, similar in concept to the supplementary material of a scientific publication. mzTab files can contain protein, peptide, and small molecule identifications together with experimental metadata and basic quantitative information. The format is not intended to store the complete experimental evidence but provides mechanisms to report results at different levels of detail. This ranges from a simple summary of the final results up to a representation of the results including the experimental design. This format is ideally suited to make MS-based proteomics and metabolomics results available to a wider biological community outside the field of MS. Several software tools for proteomics and metabolomics have already adapted the format as an output format. The comprehensive mzTab specification document and extensive additional documentation can be found at http://mztab.googlecode.com.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The HUPO Proteomics Standards Initiative (PSI) has developed several standardized data formats to facilitate data sharing in mass spectrometry (MS) based proteomics. These allow researchers to report their complete results in a unified way. However, at present, there is no format to describe the final qualitative and quantitative results for proteomics and metabolomics experiments in a simple tabular format. Many downstream analysis use cases are only concerned with the final results of an experiment and require an easily accessible format, compatible with tools like Microsoft Excel or R. We developed the mzTab file format for MS-based proteomics and metabolomics results to meet this need. mzTab is intended as a lightweight supplement to the existing standard XML-based file formats (mzML, mzIdentML, mzQuantML), providing a comprehensive summary, similar in concept to the supplementary material of a scientific publication. mzTab files can contain protein, peptide, and small molecule identifications together with experimental metadata and basic quantitative information. The format is not intended to store the complete experimental evidence but provides mechanisms to report results at different levels of detail. This ranges from a simple summary of the final results up to a representation of the results including the experimental design. This format is ideally suited to make MS-based proteomics and metabolomics results available to a wider biological community outside the field of MS. Several software tools for proteomics and metabolomics have already adapted the format as an output format. The comprehensive mzTab specification document and extensive additional documentation can be found at http://mztab.googlecode.com. |
Kramer, Katharina; Sachsenberg, Timo; Beckmann, Benedict M; Qamar, Saadia; Boon, Kum-Loong; Hentze, Matthias W; Kohlbacher, Oliver; Urlaub, Henning Photo-cross-linking and high-resolution mass spectrometry for assignment of RNA-binding sites in RNA-binding proteins Nat. Methods, 11 (10), pp. 1064-70, 2014. @article{RNPxl, title = {Photo-cross-linking and high-resolution mass spectrometry for assignment of RNA-binding sites in RNA-binding proteins}, author = {Katharina Kramer and Timo Sachsenberg and Benedict M Beckmann and Saadia Qamar and Kum-Loong Boon and Matthias W Hentze and Oliver Kohlbacher and Henning Urlaub}, doi = {https://doi.org/10.1038/nmeth.3092}, year = {2014}, date = {2014-01-01}, journal = {Nat. Methods}, volume = {11}, number = {10}, pages = {1064-70}, abstract = {RNA–protein complexes play pivotal roles in many central biological processes. While methods based on next-generation sequencing have advanced our ability to identify the specific RNAs bound by a particular protein, there is a need for precise and systematic ways to identify RNA interaction sites on proteins. We have developed an experimental and computational workflow combining photo-induced cross-linking, high-resolution mass spectrometry, and automated analysis of the resulting mass spectra for the identification of cross-linked peptides, cross-linking site and the cross-linked RNA oligonucleotide moiety of such RNA-binding proteins. The workflow can be applied to any RNA–protein complex of interest or to whole proteomes. We applied the approach to human and yeast mRNA–protein complexes in vitro and in vivo, demonstrating its powerful utility by identifying 257 cross-linking sites on 124 distinct RNA-binding proteins. The open-source software pipeline developed for this purpose, RNPxl, is available as part of the OpenMS project.}, keywords = {}, pubstate = {published}, tppubtype = {article} } RNA–protein complexes play pivotal roles in many central biological processes. While methods based on next-generation sequencing have advanced our ability to identify the specific RNAs bound by a particular protein, there is a need for precise and systematic ways to identify RNA interaction sites on proteins. We have developed an experimental and computational workflow combining photo-induced cross-linking, high-resolution mass spectrometry, and automated analysis of the resulting mass spectra for the identification of cross-linked peptides, cross-linking site and the cross-linked RNA oligonucleotide moiety of such RNA-binding proteins. The workflow can be applied to any RNA–protein complex of interest or to whole proteomes. We applied the approach to human and yeast mRNA–protein complexes in vitro and in vivo, demonstrating its powerful utility by identifying 257 cross-linking sites on 124 distinct RNA-binding proteins. The open-source software pipeline developed for this purpose, RNPxl, is available as part of the OpenMS project. |
Szolek, Andras; Schubert, Benjamin; Mohr, Christopher; Sturm, Marc; Feldhahn, Magdalena; Kohlbacher, Oliver OptiType: precision HLA typing from next-generation sequencing data Bioinformatics, 30 (23), pp. 3310-6, 2014. @article{OptiType_Bioinformatics, title = {OptiType: precision HLA typing from next-generation sequencing data}, author = {Andras Szolek and Benjamin Schubert and Christopher Mohr and Marc Sturm and Magdalena Feldhahn and Oliver Kohlbacher}, doi = {https://doi.org/10.1093/bioinformatics/btu548}, year = {2014}, date = {2014-01-01}, journal = {Bioinformatics}, volume = {30}, number = {23}, pages = {3310-6}, abstract = {Motivation: The human leukocyte antigen (HLA) gene cluster plays a crucial role in adaptive immunity and is thus relevant in many biomedical applications. While next-generation sequencing data is often available for a patient, deducing the HLA genotype is difficult due to substantial sequence similarity within the cluster and exceptionally high variability of the loci. Established approaches therefore rely on specific HLA enrichment and sequencing techniques, coming at an additional cost and extra turnaround time. Result: We present OptiType, a novel HLA genotyping algorithm based on integer linear programming, capable of producing accurate predictions from NGS data not specifically enriched for the HLA cluster. We also present a comprehensive benchmark dataset consisting of RNA, exome, and whole genome sequencing data. OptiType significantly outperformed previously published in silico approaches with an overall accuracy of 97% enabling its use in a broad range of applications.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: The human leukocyte antigen (HLA) gene cluster plays a crucial role in adaptive immunity and is thus relevant in many biomedical applications. While next-generation sequencing data is often available for a patient, deducing the HLA genotype is difficult due to substantial sequence similarity within the cluster and exceptionally high variability of the loci. Established approaches therefore rely on specific HLA enrichment and sequencing techniques, coming at an additional cost and extra turnaround time. Result: We present OptiType, a novel HLA genotyping algorithm based on integer linear programming, capable of producing accurate predictions from NGS data not specifically enriched for the HLA cluster. We also present a comprehensive benchmark dataset consisting of RNA, exome, and whole genome sequencing data. OptiType significantly outperformed previously published in silico approaches with an overall accuracy of 97% enabling its use in a broad range of applications. |
Thiel, Philipp; Sach-Peltason, Lisa; Ottmann, Christian; Kohlbacher, Oliver Blocked Inverted Indices for Exact Clustering of Large Chemical Spaces J. Chem. Inf. Model., 54 (9), pp. 2395-401, 2014. @article{ClusteringJCIM, title = {Blocked Inverted Indices for Exact Clustering of Large Chemical Spaces}, author = {Philipp Thiel and Lisa Sach-Peltason and Christian Ottmann and Oliver Kohlbacher}, url = {https://pubs.acs.org/doi/abs/10.1021/ci500150t}, year = {2014}, date = {2014-01-01}, journal = {J. Chem. Inf. Model.}, volume = {54}, number = {9}, pages = {2395-401}, abstract = {The calculation of pairwise compound similarities based on fingerprints is one of the fundamental tasks in chemoinformatics. Methods for efficient calculation of compound similarities are of utmost importance for various applications like similarity searching or library clustering. With the increasing size of public compound databases, exact clustering of these databases is desirable, but often computationally prohibitively expensive. We present an optimized inverted index algorithm for the calculation of all pairwise similarities on 2D fingerprints of a given dataset. In contrast to other algorithms it does neither require GPU computing, nor does it yield a stochastic approximation of the clustering. The algorithm has been designed to work well with multicore architectures and shows excellent parallel speedup. As an application example of this algorithm we implemented a deterministic clustering application, which has been designed to decompose virtual libraries comprising tens of millions of compounds in a short time on current hardware. Our results show, that our implementation achieves more than 400 million Tanimoto similarity calculations per second on a common desktop CPU. Deterministic clustering of the available chemical space thus can be done on modern multicore machines within a few days.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The calculation of pairwise compound similarities based on fingerprints is one of the fundamental tasks in chemoinformatics. Methods for efficient calculation of compound similarities are of utmost importance for various applications like similarity searching or library clustering. With the increasing size of public compound databases, exact clustering of these databases is desirable, but often computationally prohibitively expensive. We present an optimized inverted index algorithm for the calculation of all pairwise similarities on 2D fingerprints of a given dataset. In contrast to other algorithms it does neither require GPU computing, nor does it yield a stochastic approximation of the clustering. The algorithm has been designed to work well with multicore architectures and shows excellent parallel speedup. As an application example of this algorithm we implemented a deterministic clustering application, which has been designed to decompose virtual libraries comprising tens of millions of compounds in a short time on current hardware. Our results show, that our implementation achieves more than 400 million Tanimoto similarity calculations per second on a common desktop CPU. Deterministic clustering of the available chemical space thus can be done on modern multicore machines within a few days. |
Jordan, Elena; Roosen-Runge, Felix; Leibfarth, Sara; Zhang, Fajun; Sztucki, Michael; Hildebrandt, Andreas; Kohlbacher, Oliver; Schreiber, Frank Competing Salt Effects on Phase Behavior of Protein Solutions: Tailoring of Protein Interaction by the Binding of Multivalent Ions and Charge Screening J. Phys. Chem. B, 118 (38), pp. 11365-74, 2014. @article{CosaltingJPCB, title = {Competing Salt Effects on Phase Behavior of Protein Solutions: Tailoring of Protein Interaction by the Binding of Multivalent Ions and Charge Screening}, author = {Elena Jordan and Felix Roosen-Runge and Sara Leibfarth and Fajun Zhang and Michael Sztucki and Andreas Hildebrandt and Oliver Kohlbacher and Frank Schreiber}, doi = {https://doi.org/10.1021/jp5058622}, year = {2014}, date = {2014-01-01}, journal = {J. Phys. Chem. B}, volume = {118}, number = {38}, pages = {11365-74}, abstract = {The phase behavior of protein solutions is affected by additives such as crowder molecules or salts. In particular, upon addition of multivalent counterions, a reentrant condensation can occur, i.e. protein solutions are stable for low and high multivalent ion concentrations, but ag- gregating at intermediate salt concentrations. The addition of monovalent ions shifts the phase boundaries to higher multivalent ion concentrations. This effect is found to be reflected in the protein interactions, as accessed via small-angle X-ray scattering. Two simulation schemes- a Monte Carlo sampling of the counterion binding configurations using the detailed protein structure and an analytical coarse-grained binding model-reproduce the shifts of the experimental phase boundaries. The results support a consistent picture of the protein interactions responsible for the phase behavior. The repulsive Coulomb interaction is varied by the binding of multivalent counterions and additionally screened by any increase of the ionic strength. The attractive interaction is induced by the binding of multivalent ions, most likely due to ion bridging between protein molecules. The overall picture of these competing interactions provides interesting insight into possible mechanisms for tailoring interactions in solutions via salt effects.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The phase behavior of protein solutions is affected by additives such as crowder molecules or salts. In particular, upon addition of multivalent counterions, a reentrant condensation can occur, i.e. protein solutions are stable for low and high multivalent ion concentrations, but ag- gregating at intermediate salt concentrations. The addition of monovalent ions shifts the phase boundaries to higher multivalent ion concentrations. This effect is found to be reflected in the protein interactions, as accessed via small-angle X-ray scattering. Two simulation schemes- a Monte Carlo sampling of the counterion binding configurations using the detailed protein structure and an analytical coarse-grained binding model-reproduce the shifts of the experimental phase boundaries. The results support a consistent picture of the protein interactions responsible for the phase behavior. The repulsive Coulomb interaction is varied by the binding of multivalent counterions and additionally screened by any increase of the ionic strength. The attractive interaction is induced by the binding of multivalent ions, most likely due to ion bridging between protein molecules. The overall picture of these competing interactions provides interesting insight into possible mechanisms for tailoring interactions in solutions via salt effects. |
Hopf, Thomas A; Schärfe, Charlotta P I; Rodrigues, João P G L M; Green, Anna G; Kohlbacher, Oliver; Sander, Chris; Bonvin, Alexandre M J J; Marks, Debora S Sequence co-evolution gives 3D contacts and structures of protein complexes eLife, pp. 10.7554/eLife.03430, 2014. @article{ComplexECs_eLife, title = {Sequence co-evolution gives 3D contacts and structures of protein complexes}, author = {Thomas A Hopf and Charlotta P I Schärfe and João P G L M Rodrigues and Anna G Green and Oliver Kohlbacher and Chris Sander and Alexandre M J J Bonvin and Debora S Marks}, doi = {https://doi.org/10.7554/eLife.03430}, year = {2014}, date = {2014-01-01}, journal = {eLife}, pages = {10.7554/eLife.03430}, abstract = {Protein-protein interactions are fundamental to many biological processes. Experimental screens have identified tens of thousands of interactions and structural biology has provided detailed functional insight for select 3D protein complexes. An alternative rich source of information about protein interactions is the evolutionary sequence record. Building on earlier work, we show that analysis of correlated evolutionary sequence changes across proteins identifies residues that are close in space with sufficient accuracy to determine the three-dimensional structure of the protein complexes. We evaluate prediction performance in blinded tests on 76 complexes of known 3D structure, predict protein-protein contacts in 32 complexes of unknown structure, and demonstrate how evolutionary couplings can be used to distinguish between interacting and non-interacting protein pairs in a large complex. With the current growth of sequences, we expect that the method can be generalized to genome-wide elucidation of protein-protein interaction networks and used for interaction predictions at residue resolution.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Protein-protein interactions are fundamental to many biological processes. Experimental screens have identified tens of thousands of interactions and structural biology has provided detailed functional insight for select 3D protein complexes. An alternative rich source of information about protein interactions is the evolutionary sequence record. Building on earlier work, we show that analysis of correlated evolutionary sequence changes across proteins identifies residues that are close in space with sufficient accuracy to determine the three-dimensional structure of the protein complexes. We evaluate prediction performance in blinded tests on 76 complexes of known 3D structure, predict protein-protein contacts in 32 complexes of unknown structure, and demonstrate how evolutionary couplings can be used to distinguish between interacting and non-interacting protein pairs in a large complex. With the current growth of sequences, we expect that the method can be generalized to genome-wide elucidation of protein-protein interaction networks and used for interaction predictions at residue resolution. |
Avbelj, Monika; Wolz, Olaf-Oliver; Fekonja, Ota; Bencina, Mojca; Repič, Matej; Mavri, Janez; Krüger, Jens; Schärfe, Charlotta; Delmiro-Garcia, Magno; Panter, Gabriela; Kohlbacher, Oliver; Weber, Alexander N R; Jerala, Roman Activation of lymphoma-associated MyD88 mutations via allostery-induced TIR domain oligomerization Blood, 124 (26), pp. 3896-904, 2014. @article{myD88_Blood_2014, title = {Activation of lymphoma-associated MyD88 mutations via allostery-induced TIR domain oligomerization}, author = {Monika Avbelj and Olaf-Oliver Wolz and Ota Fekonja and Mojca Bencina and Matej Repič and Janez Mavri and Jens Krüger and Charlotta Schärfe and Magno Delmiro-Garcia and Gabriela Panter and Oliver Kohlbacher and Alexander N R Weber and Roman Jerala}, doi = {https://doi.org/10.1182/blood-2014-05-573188}, year = {2014}, date = {2014-01-01}, journal = {Blood}, volume = {124}, number = {26}, pages = {3896-904}, abstract = {MyD88 is the key signaling adapter of Toll-like and IL-1 receptors. Recurrent lymphoma-associated mutations, particularly Leu265Pro (L265P), within the MyD88 TIR domain sustain lymphoma cell survival due to constitutive NF-κB signaling. We found that mutated TIR domains displayed an intrinsic propensity for augmented oligomerization and spontaneous formation of cytosolic Myddosome aggregates in lymphoma cell lines, mimicking the effect of dimerized TIR domain. Blocking of MyD88 oligomerization induced apoptosis. The L265P TIR domain can recruit the endogenous WT MyD88 for oligomer formation and hyperactivity. Molecular dynamics simulations and analysis of additional mutations suggest that constitutive activity is caused by allosteric oligomerization.}, keywords = {}, pubstate = {published}, tppubtype = {article} } MyD88 is the key signaling adapter of Toll-like and IL-1 receptors. Recurrent lymphoma-associated mutations, particularly Leu265Pro (L265P), within the MyD88 TIR domain sustain lymphoma cell survival due to constitutive NF-κB signaling. We found that mutated TIR domains displayed an intrinsic propensity for augmented oligomerization and spontaneous formation of cytosolic Myddosome aggregates in lymphoma cell lines, mimicking the effect of dimerized TIR domain. Blocking of MyD88 oligomerization induced apoptosis. The L265P TIR domain can recruit the endogenous WT MyD88 for oligomer formation and hyperactivity. Molecular dynamics simulations and analysis of additional mutations suggest that constitutive activity is caused by allosteric oligomerization. |
Römer, Michael; Backert, Linus; Eichner, Johannes; Zell, Andreas ToxDBScan: Large-Scale Similarity Screening of Toxicological Databases for Drug Candidates International Journal of Molecular Sciences, 15 (10), pp. 19037–19055, 2014. @article{cite-kez, title = {ToxDBScan: Large-Scale Similarity Screening of Toxicological Databases for Drug Candidates}, author = {Michael Römer and Linus Backert and Johannes Eichner and Andreas Zell}, url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC4227259/}, year = {2014}, date = {2014-01-01}, journal = {International Journal of Molecular Sciences}, volume = {15}, number = {10}, pages = {19037--19055}, abstract = {We present a new tool for hepatocarcinogenicity evaluation of drug candidates in rodents. ToxDBScan is a web tool offering quick and easy similarity screening of new drug candidates against two large-scale public databases, which contain expression profiles for substances with known carcinogenic profiles: TG-GATEs and DrugMatrix. ToxDBScan uses a set similarity score that computes the putative similarity based on similar expression of genes to identify chemicals with similar genotoxic and hepatocarcinogenic potential. We propose using a discretized representation of expression profiles, which use only information on up- or down-regulation of genes as relevant features. Therefore, only the deregulated genes are required as input. ToxDBScan provides an extensive report on similar compounds, which includes additional information on compounds, differential genes and pathway enrichments. We evaluated ToxDBScan with expression data from 15 chemicals with known hepatocarcinogenic potential and observed a sensitivity of 88%. Based on the identified chemicals, we achieved perfect classification of the independent test set. ToxDBScan is publicly available from the ZBIT Bioinformatics Toolbox.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We present a new tool for hepatocarcinogenicity evaluation of drug candidates in rodents. ToxDBScan is a web tool offering quick and easy similarity screening of new drug candidates against two large-scale public databases, which contain expression profiles for substances with known carcinogenic profiles: TG-GATEs and DrugMatrix. ToxDBScan uses a set similarity score that computes the putative similarity based on similar expression of genes to identify chemicals with similar genotoxic and hepatocarcinogenic potential. We propose using a discretized representation of expression profiles, which use only information on up- or down-regulation of genes as relevant features. Therefore, only the deregulated genes are required as input. ToxDBScan provides an extensive report on similar compounds, which includes additional information on compounds, differential genes and pathway enrichments. We evaluated ToxDBScan with expression data from 15 chemicals with known hepatocarcinogenic potential and observed a sensitivity of 88%. Based on the identified chemicals, we achieved perfect classification of the independent test set. ToxDBScan is publicly available from the ZBIT Bioinformatics Toolbox. |
Nahnsen, Sven; Bielow, Chris; Reinert, Knut; Kohlbacher, Oliver Tools for label-free peptide quantification Mol. Cell. Prot., 12 (3), pp. 549-56, 2013. @article{LabelFreeReviewMCP2013, title = {Tools for label-free peptide quantification}, author = {Sven Nahnsen and Chris Bielow and Knut Reinert and Oliver Kohlbacher}, doi = {https://doi.org/10.1074/mcp.R112.025163}, year = {2013}, date = {2013-01-01}, journal = {Mol. Cell. Prot.}, volume = {12}, number = {3}, pages = {549-56}, abstract = {The increasing scale and complexity of quantitative proteomics studies complicates the subsequent analysis of the acquired data. Untargeted label-free quantification, either based on feature intensities or on spectral counting, is a method that scales particularly well with respect to the number of samples. It is thus an excellent alternative to labeling techniques. In order to profit from this scalability, however, data analysis has to cope with large amounts of data, process them automatically, and do a thorough statistical analysis in order to achieve reliable results. We review the state of the art with respect to computational tools for label-free quantification in untargeted proteomics. The two fundamental approaches are feature-based quantification, relying on the summed-up mass spectrometric intensity of peptides, and spectral counting, which relies on the number of MS/MS spectra acquired for a certain protein. We review the current algorithmic approaches underlying some widely used software packages and briefly discuss the statistical strategies required to analyze the data.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The increasing scale and complexity of quantitative proteomics studies complicates the subsequent analysis of the acquired data. Untargeted label-free quantification, either based on feature intensities or on spectral counting, is a method that scales particularly well with respect to the number of samples. It is thus an excellent alternative to labeling techniques. In order to profit from this scalability, however, data analysis has to cope with large amounts of data, process them automatically, and do a thorough statistical analysis in order to achieve reliable results. We review the state of the art with respect to computational tools for label-free quantification in untargeted proteomics. The two fundamental approaches are feature-based quantification, relying on the summed-up mass spectrometric intensity of peptides, and spectral counting, which relies on the number of MS/MS spectra acquired for a certain protein. We review the current algorithmic approaches underlying some widely used software packages and briefly discuss the statistical strategies required to analyze the data. |
Nahnsen, Sven; Sachsenberg, Timo; Kohlbacher, Oliver PTMeta: Increasing Identification Rates of Modified Peptides Using Modification Pre-scanning and Meta-analysis Proteomics, 13 (6), pp. 1042-51, 2013. @article{PTMeta, title = {PTMeta: Increasing Identification Rates of Modified Peptides Using Modification Pre-scanning and Meta-analysis}, author = {Sven Nahnsen and Timo Sachsenberg and Oliver Kohlbacher}, doi = {https://doi.org/10.1002/pmic.201200315}, year = {2013}, date = {2013-01-01}, journal = {Proteomics}, volume = {13}, number = {6}, pages = {1042-51}, abstract = {The analysis of peptides and proteins in complex biological systems has greatly improved over the last decade. State-of-the-art mass spectrometric instruments combined with adequate software tools allow for more and more comprehensive proteome analyses. Most proteome-wide studies focus on the analysis of unmodified proteins or look at selected modifications only. However, spectral information of protein modifications, chemically induced through sample preparation or post-translationally attached in biological pathways is acquired as a significant, yet disregarded, fraction of tandem spectra in most discovery studies. We present a new computational pipeline, PTMeta, to uncover information of modifications attached to peptides. We use modification pre-scanning to pinpoint the most abundant potential modifications, followed by extensive database search and a statistical framework to combine results from database search runs with different modification settings.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The analysis of peptides and proteins in complex biological systems has greatly improved over the last decade. State-of-the-art mass spectrometric instruments combined with adequate software tools allow for more and more comprehensive proteome analyses. Most proteome-wide studies focus on the analysis of unmodified proteins or look at selected modifications only. However, spectral information of protein modifications, chemically induced through sample preparation or post-translationally attached in biological pathways is acquired as a significant, yet disregarded, fraction of tandem spectra in most discovery studies. We present a new computational pipeline, PTMeta, to uncover information of modifications attached to peptides. We use modification pre-scanning to pinpoint the most abundant potential modifications, followed by extensive database search and a statistical framework to combine results from database search runs with different modification settings. |
Perez-Riverol, Yasset; Hermjakob, Henning; Kohlbacher, Oliver; Martens, Lennart; Creasy, David; Cox, Jürgen; Leprevost, Felipe; Shan, Baozhen Paul; Cabrera, Gleysin; Guillen, Gerardo; Padron, Gabriel; Gonzalez, Luis Xavier; Besada, Vladimir Computational Proteomics Pitfalls and Challenges: HavanaBioinfo 2012 Workshop Report J. Proteomics, 87 , pp. 134-8, 2013. @article{HavanaWorkshop2012, title = {Computational Proteomics Pitfalls and Challenges: HavanaBioinfo 2012 Workshop Report}, author = {Yasset Perez-Riverol and Henning Hermjakob and Oliver Kohlbacher and Lennart Martens and David Creasy and Jürgen Cox and Felipe Leprevost and Baozhen Paul Shan and Gleysin Cabrera and Gerardo Guillen and Gabriel Padron and Luis Xavier Gonzalez and Vladimir Besada}, doi = {https://doi.org/10.1016/j.jprot.2013.01.019}, year = {2013}, date = {2013-01-01}, journal = {J. Proteomics}, volume = {87}, pages = {134-8}, abstract = {The workshop "Bioinformatics for Biotechnology Applications (HavanaBioinfo 2012)", held December 8-11 2012 in Havana, aimed at exploring new bioinformatics tools and approaches for large-scale proteomics, genomics and chemoinformatics. Major conclusions of the workshop include the following: (i) Development of new applications and bioinformatics tools for proteomic repositories analysis is crucial; current proteomic repositories contain enough data (spectra/identifications) that can be used to increase the annotations in protein databases and to generate new tools for protein identification; (ii) spectral libraries, de novo sequencing and database search tools should be combined to increase the number of protein identifications; (iii) Protein probabilities and FDR are not yet sufficiently mature; (iv) computational proteomics software needs to become more intuitive; and at the same time appropriate education and training should be provided to help efficient exchange of knowledge between mass spectrometrist and experimental biologists with bioinformaticians in order to increase their bioinformatics background, especially statistics knowledge.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The workshop "Bioinformatics for Biotechnology Applications (HavanaBioinfo 2012)", held December 8-11 2012 in Havana, aimed at exploring new bioinformatics tools and approaches for large-scale proteomics, genomics and chemoinformatics. Major conclusions of the workshop include the following: (i) Development of new applications and bioinformatics tools for proteomic repositories analysis is crucial; current proteomic repositories contain enough data (spectra/identifications) that can be used to increase the annotations in protein databases and to generate new tools for protein identification; (ii) spectral libraries, de novo sequencing and database search tools should be combined to increase the number of protein identifications; (iii) Protein probabilities and FDR are not yet sufficiently mature; (iv) computational proteomics software needs to become more intuitive; and at the same time appropriate education and training should be provided to help efficient exchange of knowledge between mass spectrometrist and experimental biologists with bioinformaticians in order to increase their bioinformatics background, especially statistics knowledge. |
Weisser, Hendrik; Nahnsen, Sven; Grossmann, Jonas; Nilse, Lars; Quandt, Andreas; Brauer, Hendrik; Sturm, Marc; Kenar, Erhan; Kohlbacher, Oliver; Aebersold, Ruedi; Malmström, Lars An automated pipeline for high-throughput label-free quantitative proteomics J. Proteome Res., 12 (4), pp. 1824-1644, 2013. @article{LFQPipeJPR, title = {An automated pipeline for high-throughput label-free quantitative proteomics}, author = {Hendrik Weisser and Sven Nahnsen and Jonas Grossmann and Lars Nilse and Andreas Quandt and Hendrik Brauer and Marc Sturm and Erhan Kenar and Oliver Kohlbacher and Ruedi Aebersold and Lars Malmström}, doi = {https://doi.org/10.1021/pr300992u}, year = {2013}, date = {2013-01-01}, journal = {J. Proteome Res.}, volume = {12}, number = {4}, pages = {1824-1644}, abstract = {We present a computational pipeline for the quantification of peptides and proteins in label-free LC-MS/MS datasets. The pipeline is composed of tools from the OpenMS software framework and is applicable to the processing of large experiments (50+ samples). We describe several enhancements that we have introduced to OpenMS to realize the implementation of this pipeline. They include new algorithms for centroiding of raw data, for feature detection, for the alignment of multiple related measurements, and a new tool for the calculation of peptide and protein abundances. Where possible, we compare the performance of the new algorithms to that of their established counterparts in OpenMS. We validate the pipeline based on two small datasets that provide ground truths for the quantification. There, we also compare our results to those of MaxQuant and Progenesis LC-MS -- two popular alternatives for the analysis of label-free data. We then show how our software can be applied to a large heterogenous dataset of 58 LC-MS/MS runs.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We present a computational pipeline for the quantification of peptides and proteins in label-free LC-MS/MS datasets. The pipeline is composed of tools from the OpenMS software framework and is applicable to the processing of large experiments (50+ samples). We describe several enhancements that we have introduced to OpenMS to realize the implementation of this pipeline. They include new algorithms for centroiding of raw data, for feature detection, for the alignment of multiple related measurements, and a new tool for the calculation of peptide and protein abundances. Where possible, we compare the performance of the new algorithms to that of their established counterparts in OpenMS. We validate the pipeline based on two small datasets that provide ground truths for the quantification. There, we also compare our results to those of MaxQuant and Progenesis LC-MS -- two popular alternatives for the analysis of label-free data. We then show how our software can be applied to a large heterogenous dataset of 58 LC-MS/MS runs. |
Brand, Luise; Fischer, Nina; Harter, Klaus; Kohlbacher, Oliver; Wanke, Dierk Elucidating the evolutionary conserved DNA-binding specificities of WRKY transcription factors by molecular dynamics and in vitro binding assays Nucl. Acids Res., 41 (21), pp. 9764-78, 2013. @article{NAR-WRKY-2013, title = {Elucidating the evolutionary conserved DNA-binding specificities of WRKY transcription factors by molecular dynamics and in vitro binding assays}, author = {Luise Brand and Nina Fischer and Klaus Harter and Oliver Kohlbacher and Dierk Wanke}, doi = {https://doi.org/10.1093/nar/gkt732}, year = {2013}, date = {2013-01-01}, journal = {Nucl. Acids Res.}, volume = {41}, number = {21}, pages = {9764-78}, abstract = {WRKY transcription factors constitute a large protein family in plants that is involved in the regulation of developmental processes and responses to biotic or abiotic stimuli. The question arises how stimulus-specific responses are mediated given that the highly conserved WRKY DNA-binding domain (DBD) exclusively recognizes the 'TTGACY' W-box consensus. We speculated that the W-box consensus might be more degenerate and yet undetected differences in the W-box consensus of WRKYs of different evolutionary descent exist. The phylogenetic analysis of WRKY DBDs suggests that they evolved from an ancestral group IIc-like WRKY early in the eukaryote lineage. A direct descent of group IIc WRKYs supports a monophyletic origin of all other group II and III WRKYs from group I by loss of an N-terminal DBD. Group I WRKYs are of paraphyletic descent and evolved multiple times independently. By homology modeling, molecular dynamics simulations and in vitro DNA-protein interaction-enzyme-linked immunosorbent assay with AtWRKY50 (IIc), AtWRKY33 (I) and AtWRKY11 (IId) DBDs, we revealed differences in DNA-binding specificities. Our data imply that other components are essentially required besides the W-box-specific binding to DNA to facilitate a stimulus-specific WRKY function.}, keywords = {}, pubstate = {published}, tppubtype = {article} } WRKY transcription factors constitute a large protein family in plants that is involved in the regulation of developmental processes and responses to biotic or abiotic stimuli. The question arises how stimulus-specific responses are mediated given that the highly conserved WRKY DNA-binding domain (DBD) exclusively recognizes the 'TTGACY' W-box consensus. We speculated that the W-box consensus might be more degenerate and yet undetected differences in the W-box consensus of WRKYs of different evolutionary descent exist. The phylogenetic analysis of WRKY DBDs suggests that they evolved from an ancestral group IIc-like WRKY early in the eukaryote lineage. A direct descent of group IIc WRKYs supports a monophyletic origin of all other group II and III WRKYs from group I by loss of an N-terminal DBD. Group I WRKYs are of paraphyletic descent and evolved multiple times independently. By homology modeling, molecular dynamics simulations and in vitro DNA-protein interaction-enzyme-linked immunosorbent assay with AtWRKY50 (IIc), AtWRKY33 (I) and AtWRKY11 (IId) DBDs, we revealed differences in DNA-binding specificities. Our data imply that other components are essentially required besides the W-box-specific binding to DNA to facilitate a stimulus-specific WRKY function. |
Wanke, Dierk; Brand, Luise H; Fischer, Nina M; Peschke, Florian; Kilian, Joachim; Berendzen, K W Implications of DNA-nanostructures by Hoogsteen-dinucleotides on transcription factor binding Quantum Bio-Informatics V: From Quantum Information to Bio-Informatics, 30 , Chapter 30, pp. 351-362, World Scientific, 2013, ISBN: 978-981-4460-01-9, (ISBN: 978-981-4460-01-9). @inbook{Wanke_Hoogsteen_2013, title = {Implications of DNA-nanostructures by Hoogsteen-dinucleotides on transcription factor binding}, author = {Dierk Wanke and Luise H Brand and Nina M Fischer and Florian Peschke and Joachim Kilian and K W Berendzen}, url = {http://www.worldscientific.com/worldscibooks/10.1142/8750}, isbn = {978-981-4460-01-9}, year = {2013}, date = {2013-01-01}, booktitle = {Quantum Bio-Informatics V: From Quantum Information to Bio-Informatics}, volume = {30}, pages = {351-362}, publisher = {World Scientific}, chapter = {30}, series = {QP-PQ: Quantum Probability and White Noise Analysis}, note = {ISBN: 978-981-4460-01-9}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } |
Sturm, T; Leinders-Zufall, T; Maček, Boris; Walzer, Mathias; Jung, S; Pömmerl, B; Stevanović, Stefan; Zufall, F; Overath, P; Rammensee, Hans-Georg Mouse urinary peptides provide a molecular basis for genotype discrimination by nasal sensory neurons. Nat. Commun., 19 (4), pp. 1616, 2013. @article{MousePeptidesNatCommun2013, title = {Mouse urinary peptides provide a molecular basis for genotype discrimination by nasal sensory neurons.}, author = {T Sturm and T Leinders-Zufall and Boris Maček and Mathias Walzer and S Jung and B Pömmerl and Stefan Stevanović and F Zufall and P Overath and Hans-Georg Rammensee}, doi = {https://doi.org/10.1038/ncomms2610}, year = {2013}, date = {2013-01-01}, journal = {Nat. Commun.}, volume = {19}, number = {4}, pages = {1616}, abstract = {Selected groups of peptides, including those that are presented by major histocompatibility complex (MHC) proteins, have been proposed to transmit information to the olfactory system of vertebrates via their ability to stimulate chemosensory neurons. However, the lack of knowledge about such peptides in natural sources accessible for nasal recognition has been a major barrier for this hypothesis. Here we analyse urinary peptides from selected mouse strains with respect to genotype-related individual differences. We discover many abundant peptides with single amino-acid variations corresponding to genomic differences. The polymorphism of major urinary proteins is reflected by variations in prominent urinary peptides. We also demonstrate an MHC-dependent peptide (SIINFEKL) occurring at very low concentrations in mouse urine. Chemoreceptive neurons in the vomeronasal organ detect and discriminate single amino-acid variation peptides as well as SIINFEKL. Hence, urinary peptides represent a real-time sampling of the expressed genome available for chemosensory assessment by other individuals.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Selected groups of peptides, including those that are presented by major histocompatibility complex (MHC) proteins, have been proposed to transmit information to the olfactory system of vertebrates via their ability to stimulate chemosensory neurons. However, the lack of knowledge about such peptides in natural sources accessible for nasal recognition has been a major barrier for this hypothesis. Here we analyse urinary peptides from selected mouse strains with respect to genotype-related individual differences. We discover many abundant peptides with single amino-acid variations corresponding to genomic differences. The polymorphism of major urinary proteins is reflected by variations in prominent urinary peptides. We also demonstrate an MHC-dependent peptide (SIINFEKL) occurring at very low concentrations in mouse urine. Chemoreceptive neurons in the vomeronasal organ detect and discriminate single amino-acid variation peptides as well as SIINFEKL. Hence, urinary peptides represent a real-time sampling of the expressed genome available for chemosensory assessment by other individuals. |
Roosen-Runge, Felix; Heck, Benjamin; Zhang, Fajun; Kohlbacher, Oliver; Schreiber, Frank Interplay of pH and Binding of Multivalent Metal Ions: Charge Inversion and Reentrant Condensation in Protein Solutions J. Phys. Chem. B, 117 (18), pp. 5777-87, 2013. @article{FeAlpHReentrant, title = {Interplay of pH and Binding of Multivalent Metal Ions: Charge Inversion and Reentrant Condensation in Protein Solutions}, author = {Felix Roosen-Runge and Benjamin Heck and Fajun Zhang and Oliver Kohlbacher and Frank Schreiber}, url = {https://pubs.acs.org/doi/abs/10.1021/jp401874t}, year = {2013}, date = {2013-01-01}, journal = {J. Phys. Chem. B}, volume = {117}, number = {18}, pages = {5777-87}, abstract = {Tuning of protein surface charge is a fundamental mechanism in biological systems. Protein charge is regulated in a physiological context by pH and interaction with counterions. We report on charge inversion and the related reentrant condensation in solutions of globular proteins with different multivalent metal cations. In particular, we focus on the changes in phase behavior and charge regulation due to pH effects caused by hydrolysis of metal ions. For several proteins and metal salts, charge inversion as measured by electrophoretic light scattering is found to be a universal phenomenon, the extent of which is dependent on the specific protein-salt combination. Reentrant phase diagrams show a much narrower phase-separated regime for acidic salts such as AlCl3 and FeCl3 compared to neutral salts such as YCl3 or LaCl3. The differences between acidic and neutral salts can be explained by the interplay of pH effects and binding of the multivalent counterions. The experimental findings are reproduced with good agreement by an analytical model for protein charging taking into account ion condensation, metal ion hydrolysis and interaction with charged amino acid side chains on the protein surface. Finally, the relationship of charge inversion and reentrant condensation is discussed, suggesting that pH variation in combination with multivalent cations provides control over both attractive and repulsive interactions between proteins. Keywords: charge regulation, phase behavior, ion-protein interaction, metal hydrolysis, zeta potential, electrophoretic light scattering.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Tuning of protein surface charge is a fundamental mechanism in biological systems. Protein charge is regulated in a physiological context by pH and interaction with counterions. We report on charge inversion and the related reentrant condensation in solutions of globular proteins with different multivalent metal cations. In particular, we focus on the changes in phase behavior and charge regulation due to pH effects caused by hydrolysis of metal ions. For several proteins and metal salts, charge inversion as measured by electrophoretic light scattering is found to be a universal phenomenon, the extent of which is dependent on the specific protein-salt combination. Reentrant phase diagrams show a much narrower phase-separated regime for acidic salts such as AlCl3 and FeCl3 compared to neutral salts such as YCl3 or LaCl3. The differences between acidic and neutral salts can be explained by the interplay of pH effects and binding of the multivalent counterions. The experimental findings are reproduced with good agreement by an analytical model for protein charging taking into account ion condensation, metal ion hydrolysis and interaction with charged amino acid side chains on the protein surface. Finally, the relationship of charge inversion and reentrant condensation is discussed, suggesting that pH variation in combination with multivalent cations provides control over both attractive and repulsive interactions between proteins. Keywords: charge regulation, phase behavior, ion-protein interaction, metal hydrolysis, zeta potential, electrophoretic light scattering. |
Walzer, Mathias; Qi, Da; Mayer, Gerhard; Uszkoreit, Julian; Eisenacher, Martin; Sachsenberg, Timo; Gonzalez-Galarza, Faviel F; Fan, Jun; Bessant, Conrad; Deutsch, Eric W; Reisinger, Florian; Vizcaíno, Juan Antonio; Medina-Aunon, Alberto J; Albar, Juan Pablo; Kohlbacher, Oliver; Jones, Andrew R The mzQuantML data standard for mass spectrometry-based quantitative studies in proteomics Mol. Cell. Prot., 12 (8), pp. 2332-40, 2013. @article{mzQuantMCP, title = {The mzQuantML data standard for mass spectrometry-based quantitative studies in proteomics}, author = {Mathias Walzer and Da Qi and Gerhard Mayer and Julian Uszkoreit and Martin Eisenacher and Timo Sachsenberg and Faviel F Gonzalez-Galarza and Jun Fan and Conrad Bessant and Eric W Deutsch and Florian Reisinger and Juan Antonio Vizcaíno and Alberto J Medina-Aunon and Juan Pablo Albar and Oliver Kohlbacher and Andrew R Jones}, doi = {https://doi.org/10.1074/mcp.O113.028506}, year = {2013}, date = {2013-01-01}, journal = {Mol. Cell. Prot.}, volume = {12}, number = {8}, pages = {2332-40}, abstract = {The range of heterogeneous approaches available for quantifying protein abundance by mass spectrometry (MS) leads to considerable challenges in modelling, archiving, exchanging or submitting experimental data sets as supplemental material to journals. To date, there has been no widely accepted format for capturing the evidence trail of how quantitative analysis has been performed by software, for transferring data between software packages or submitting to public databases. In the context of the Proteomics Standards Initiative, we have developed the mzQuantML data standard. The standard can represent quantitative data about regions in two-dimensional retention time versus mass/charge space (called features), peptides, proteins and protein groups - where there is ambiguity in peptide to protein inference, plus limited support for small molecule (metabolomic) data. The format has structures for representing replicate MS runs, grouping of replicates, for example as study variables, and for capturing the parameters used by software packages to arrive at these values. The format has capabilities to reference other standards such as mzML and mzIdentML and thus the evidence trail for the MS workflow as a whole can now be described. Several software implementations are available and we encourage other bioinformatics groups to use mzQuantML as an input, internal or output format for quantitative software and for structuring local repositories. All project resources are available in the public domain from http://www.psidev.info/mzquantml.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The range of heterogeneous approaches available for quantifying protein abundance by mass spectrometry (MS) leads to considerable challenges in modelling, archiving, exchanging or submitting experimental data sets as supplemental material to journals. To date, there has been no widely accepted format for capturing the evidence trail of how quantitative analysis has been performed by software, for transferring data between software packages or submitting to public databases. In the context of the Proteomics Standards Initiative, we have developed the mzQuantML data standard. The standard can represent quantitative data about regions in two-dimensional retention time versus mass/charge space (called features), peptides, proteins and protein groups - where there is ambiguity in peptide to protein inference, plus limited support for small molecule (metabolomic) data. The format has structures for representing replicate MS runs, grouping of replicates, for example as study variables, and for capturing the parameters used by software packages to arrive at these values. The format has capabilities to reference other standards such as mzML and mzIdentML and thus the evidence trail for the MS workflow as a whole can now be described. Several software implementations are available and we encourage other bioinformatics groups to use mzQuantML as an input, internal or output format for quantitative software and for structuring local repositories. All project resources are available in the public domain from http://www.psidev.info/mzquantml. |
de la Garza, Luis; Krüger, Jens; Schärfe, Charlotta; Röttig, Marc; Aiche, Stephan; Reinert, Knut; Kohlbacher, Oliver From the desktop to the grid: conversion of KNIME Workflows to gUSE Proc. IWSG 2013, 993 , CEUR-WS 2013. @conference{IWSG13KNIME, title = {From the desktop to the grid: conversion of KNIME Workflows to gUSE}, author = {Luis de la Garza and Jens Krüger and Charlotta Schärfe and Marc Röttig and Stephan Aiche and Knut Reinert and Oliver Kohlbacher}, url = {http://ceur-ws.org/Vol-993/}, year = {2013}, date = {2013-01-01}, booktitle = {Proc. IWSG 2013}, volume = {993}, pages = {9}, series = {CEUR-WS}, abstract = {The Konstanz Information Miner is a user-friendly graphical workflow designer with a broad user base in industry and academia. Its broad range of embedded tools and its powerful data mining and visualization tools render it ideal for scientific workflows. It is thus used more and more in a broad range of applications. However, the free version typically runs on a desktop computer, restricting users if they want to tap into computing power. The grid and cloud User Support Environment is a free and open source project created for parallelized and distributed systems, but the creation of workflows with the included components has a steeper learning curve. In this work we suggest an easy to implement solution combining the ease-of-use of the Konstanz Information Miner with the computational power of distributed computing infrastructures. We present a solution permitting the conversion of workflows between the two platforms. This enables a convenient development, debugging, and maintenance of scientific workflows on the desktop. These workflows can then be deployed on a cloud or grid, thus permitting large-scale computation. To achieve our goals, we relied on a Common Tool Description XML file format which describes the execution of arbitrary programs in a structured and easily readable and parseable way. In order to integrate external programs into we employed the Generic KNIME Nodes extension.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The Konstanz Information Miner is a user-friendly graphical workflow designer with a broad user base in industry and academia. Its broad range of embedded tools and its powerful data mining and visualization tools render it ideal for scientific workflows. It is thus used more and more in a broad range of applications. However, the free version typically runs on a desktop computer, restricting users if they want to tap into computing power. The grid and cloud User Support Environment is a free and open source project created for parallelized and distributed systems, but the creation of workflows with the included components has a steeper learning curve. In this work we suggest an easy to implement solution combining the ease-of-use of the Konstanz Information Miner with the computational power of distributed computing infrastructures. We present a solution permitting the conversion of workflows between the two platforms. This enables a convenient development, debugging, and maintenance of scientific workflows on the desktop. These workflows can then be deployed on a cloud or grid, thus permitting large-scale computation. To achieve our goals, we relied on a Common Tool Description XML file format which describes the execution of arbitrary programs in a structured and easily readable and parseable way. In order to integrate external programs into we employed the Generic KNIME Nodes extension. |