Publications
Briesemeister, Sebastian; Rahnenführer, Jörg; Kohlbacher, Oliver Going from where to why - interpretable prediction of protein subcellular localization Bioinformatics, 26 (9), pp. 1232-1238, 2010. @article{YLoc-Bioinformatics-2010, title = {Going from where to why - interpretable prediction of protein subcellular localization}, author = {Sebastian Briesemeister and Jörg Rahnenführer and Oliver Kohlbacher}, url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btq115?ijkey=iAblSmDnFCB8WF2&keytype=ref}, year = {2010}, date = {2010-01-01}, journal = {Bioinformatics}, volume = {26}, number = {9}, pages = {1232-1238}, abstract = {Motivation:Protein subcellular localization is pivotal in understanding a protein’s function. Computational prediction of subcellular localization has become a viable alternative to experimental approaches. while current machine learning-based methods yield good prediction accuracy, most of them suffer from two key problems: lack of interpretability and dealing with multiple locations. Results: We present YLoc, a novel method for predicting protein subcellular localization that addresses these issues. Due to its simple architecture, YLoc can identify the relevant features of a protein sequence contributing to its subcellular localization, e.g., localization signals or motifs relevant to protein sorting. We present several example applications where YLoc identifies the sequence features responsible for protein localization and thus reveals not only to which location a protein is transported to, but also why it is transported there. YLoc also provides a confidence estimate for the prediction. The user can thus decide what level of error is acceptable for a prediction. Due to a probabilistic approach and the use of several thousands of dual-targeted proteins, YLoc is able to predict multiple locations per protein. YLoc was benchmarked using several independent datasets for protein subcellular localization and performs on a par with other state-of-the-art predictors. Disregarding low-confidence predictions, YLoc can achieve prediction accuracies of over 90%. Moreover, we show that YLoc is able to reliably predict multiple locations and outperforms the best predictors in this area. Availability: www.multiloc.org/YLoc.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation:Protein subcellular localization is pivotal in understanding a protein’s function. Computational prediction of subcellular localization has become a viable alternative to experimental approaches. while current machine learning-based methods yield good prediction accuracy, most of them suffer from two key problems: lack of interpretability and dealing with multiple locations. Results: We present YLoc, a novel method for predicting protein subcellular localization that addresses these issues. Due to its simple architecture, YLoc can identify the relevant features of a protein sequence contributing to its subcellular localization, e.g., localization signals or motifs relevant to protein sorting. We present several example applications where YLoc identifies the sequence features responsible for protein localization and thus reveals not only to which location a protein is transported to, but also why it is transported there. YLoc also provides a confidence estimate for the prediction. The user can thus decide what level of error is acceptable for a prediction. Due to a probabilistic approach and the use of several thousands of dual-targeted proteins, YLoc is able to predict multiple locations per protein. YLoc was benchmarked using several independent datasets for protein subcellular localization and performs on a par with other state-of-the-art predictors. Disregarding low-confidence predictions, YLoc can achieve prediction accuracies of over 90%. Moreover, we show that YLoc is able to reliably predict multiple locations and outperforms the best predictors in this area. Availability: www.multiloc.org/YLoc. |
Briesemeister, Sebastian; Rahnenführer, Jörg; Kohlbacher, Oliver YLoc – an interpretable web server for predicting subcellular localization Nucl. Acids Res., 38 , pp. W497-W502, 2010. @article{YLoc-NAR, title = {YLoc – an interpretable web server for predicting subcellular localization}, author = {Sebastian Briesemeister and Jörg Rahnenführer and Oliver Kohlbacher}, url = {http://nar.oxfordjournals.org/content/38/suppl_2/W497.full}, year = {2010}, date = {2010-01-01}, journal = {Nucl. Acids Res.}, volume = {38}, pages = {W497-W502}, abstract = {Predicting subcellular localization has become a valuable alternative to time-consuming experimental methods. Major drawbacks of many of these predictors is their lack of interpretability and the fact that they do not provide an estimate of the confidence of an individual prediction. We present YLoc, an interpretable web server for predicting subcellular localization. YLoc uses natural language to explain why a prediction was made and which biological property of the protein was mainly responsible for it. In addition, YLoc estimates the reliability of its own predictions. YLoc can, thus, assist in understanding protein localization and in location engineering of proteins. The YLoc web server is available online at www.multiloc.org/YLoc.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Predicting subcellular localization has become a valuable alternative to time-consuming experimental methods. Major drawbacks of many of these predictors is their lack of interpretability and the fact that they do not provide an estimate of the confidence of an individual prediction. We present YLoc, an interpretable web server for predicting subcellular localization. YLoc uses natural language to explain why a prediction was made and which biological property of the protein was mainly responsible for it. In addition, YLoc estimates the reliability of its own predictions. YLoc can, thus, assist in understanding protein localization and in location engineering of proteins. The YLoc web server is available online at www.multiloc.org/YLoc. |
Widmer, Christian; Toussaint, Nora C; Altun, Yasemin; Kohlbacher, Oliver; Rätsch, Gunnar Novel Machine Learning Methods for MHC Class I Binding Prediction Pattern Recognition in Bioinformatics 5th IAPR International Conference, PRIB 2010, Nijmegen, The Netherlands, September 22-24, 2010. Proceedings, pp. 98-109, Springer, 2010. @inproceedings{PRIB2010, title = {Novel Machine Learning Methods for MHC Class I Binding Prediction}, author = {Christian Widmer and Nora C Toussaint and Yasemin Altun and Oliver Kohlbacher and Gunnar Rätsch}, url = {https://link.springer.com/chapter/10.1007/978-3-642-16001-1_9}, year = {2010}, date = {2010-01-01}, booktitle = {Pattern Recognition in Bioinformatics 5th IAPR International Conference, PRIB 2010, Nijmegen, The Netherlands, September 22-24, 2010. Proceedings}, volume = {6282}, pages = {98-109}, publisher = {Springer}, series = {Lecture Notes in Computer Sciences}, abstract = {MHC class I molecules are key players in the human immune system. They bind small peptides derived from intracellular proteins and present them on the cell surface for surveillance by the immune system. Prediction of such MHC class I binding peptides is a vital step in the design of peptide-based vaccines and therefore one of the major problems in computational immunology. Thousands of different types of MHC class I molecules exist, each displaying a distinct binding specificity. The lack of sufficient training data for the majority of these molecules hinders the application of Machine Learning to this problem. We propose two approaches to improve the predictive power of kernel-based Machine Learning methods for MHC class I binding prediction: First, a modification of the Weighted Degree string kernel that allows for the incorporation of amino acid properties. Second, we propose an enhanced Multitask kernel and an optimization procedure to fine-tune its kernel parameters. The combination of both approaches yields improved performance, which we demonstrate on the IEDB benchmark data set.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } MHC class I molecules are key players in the human immune system. They bind small peptides derived from intracellular proteins and present them on the cell surface for surveillance by the immune system. Prediction of such MHC class I binding peptides is a vital step in the design of peptide-based vaccines and therefore one of the major problems in computational immunology. Thousands of different types of MHC class I molecules exist, each displaying a distinct binding specificity. The lack of sufficient training data for the majority of these molecules hinders the application of Machine Learning to this problem. We propose two approaches to improve the predictive power of kernel-based Machine Learning methods for MHC class I binding prediction: First, a modification of the Weighted Degree string kernel that allows for the incorporation of amino acid properties. Second, we propose an enhanced Multitask kernel and an optimization procedure to fine-tune its kernel parameters. The combination of both approaches yields improved performance, which we demonstrate on the IEDB benchmark data set. |
Widmer, Christian; Altun, Yasemin; Toussaint, Nora C; Rätsch, Gunnar Inferring Latent Task Structure for Multi-Task Learning via Multiple Kernel Learning BMC Bioinformatics, 11 (Suppl 8), pp. S5, 2010. @article{WATR2010, title = {Inferring Latent Task Structure for Multi-Task Learning via Multiple Kernel Learning}, author = {Christian Widmer and Yasemin Altun and Nora C Toussaint and Gunnar Rätsch}, url = {http://www.biomedcentral.com/1471-2105/11/S8/S5}, year = {2010}, date = {2010-01-01}, journal = {BMC Bioinformatics}, volume = {11}, number = {Suppl 8}, pages = {S5}, abstract = {Background: The lack of sucient training data is the limiting factor for many Machine Learning applications in Computational Biology. If data is available for several dierent but related problem domains, Multitask Learning algorithms can be used to learn a model based on all available information. In Bioinformatics, many problems can be cast into the Multitask Learning scenario by incorporating data from several organisms. However, combining information from several tasks requires the careful consideration of the degree of similarity between tasks. Our proposed method simultaneously learns (or renes) the similarity between tasks along with the Multitask Learning Classier. This is done by formulating the Multitask Learning problem as Multiple Kernel Learning, using the recently published q-Norm MKL algorithm. Results: We demonstrate the performance of our method on two problems from Computational Biology. First, we show that our method is able to improve performance on a splice-site dataset with given hierarchical task structure by rening the task relationships. Second, we consider an MHC-I dataset, for which we assume no knowledge about the degree of task relatedness. Here, we are able to learn the task similarity from scratch along with the Multitask classiers. In both cases, we clearly outperform baseline methods that we compare against. Conclusions: We present a novel approach to Multitask Learning that learns task similarity along with the classiers. The framework is very general as it allows to incorporate prior knowledge about tasks relationships if available, but is also able to identify task similarities in absence of additional information. Both variants show promising results in applications from Computational Biology.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background: The lack of sucient training data is the limiting factor for many Machine Learning applications in Computational Biology. If data is available for several dierent but related problem domains, Multitask Learning algorithms can be used to learn a model based on all available information. In Bioinformatics, many problems can be cast into the Multitask Learning scenario by incorporating data from several organisms. However, combining information from several tasks requires the careful consideration of the degree of similarity between tasks. Our proposed method simultaneously learns (or renes) the similarity between tasks along with the Multitask Learning Classier. This is done by formulating the Multitask Learning problem as Multiple Kernel Learning, using the recently published q-Norm MKL algorithm. Results: We demonstrate the performance of our method on two problems from Computational Biology. First, we show that our method is able to improve performance on a splice-site dataset with given hierarchical task structure by rening the task relationships. Second, we consider an MHC-I dataset, for which we assume no knowledge about the degree of task relatedness. Here, we are able to learn the task similarity from scratch along with the Multitask classiers. In both cases, we clearly outperform baseline methods that we compare against. Conclusions: We present a novel approach to Multitask Learning that learns task similarity along with the classiers. The framework is very general as it allows to incorporate prior knowledge about tasks relationships if available, but is also able to identify task similarities in absence of additional information. Both variants show promising results in applications from Computational Biology. |
Toussaint, Nora C; Widmer, Christian; Kohlbacher, Oliver; Rätsch, Gunnar Exploiting Physico-Chemical Properties in String Kernels BMC Bioinformatics, 11 (Suppl 8), pp. S7, 2010. @article{TWKR2010, title = {Exploiting Physico-Chemical Properties in String Kernels}, author = {Nora C Toussaint and Christian Widmer and Oliver Kohlbacher and Gunnar Rätsch}, url = {http://www.biomedcentral.com/1471-2105/11/S8/S7}, year = {2010}, date = {2010-01-01}, journal = {BMC Bioinformatics}, volume = {11}, number = {Suppl 8}, pages = {S7}, abstract = {Background: String kernels are commonly used for the classication of biological sequences, nucleotide as well as amino acid sequences. Although string kernels are already very powerful, when it comes to amino acids they have a major short coming. They ignore an important piece of information when comparing amino acids: the physico-chemical properties such as size, hydrophobicity, or charge. This information is very valuable, especially when training data is less abundant. There have been only very few approaches so far that aim at combining these two ideas. Results: We propose new string kernels that combine the benets of physico-chemical descriptors for amino acids with the ones of string kernels. The benets of the proposed kernels are assessed on two problems: MHC-peptide binding classication using position specic kernels and protein classication based on the substring spectrum of the sequences. Our experiments demonstrate that the incorporation of amino acid properties in string kernels yields improved performances compared to standard string kernels and to previously proposed non-substring kernels. Conclusions: In summary, the proposed modications, in particular the combination with the RBF substring kernel, consistently yield improvements without aecting the computational complexity. The proposed kernels therefore appear to be the kernels of choice for any protein sequence-based inference. Availability: Data sets, code and additional information will be available from http://www.fml.mpg.de/raetsch/suppl/aask. The developed kernels will be part of the next Shogun toolbox release.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background: String kernels are commonly used for the classication of biological sequences, nucleotide as well as amino acid sequences. Although string kernels are already very powerful, when it comes to amino acids they have a major short coming. They ignore an important piece of information when comparing amino acids: the physico-chemical properties such as size, hydrophobicity, or charge. This information is very valuable, especially when training data is less abundant. There have been only very few approaches so far that aim at combining these two ideas. Results: We propose new string kernels that combine the benets of physico-chemical descriptors for amino acids with the ones of string kernels. The benets of the proposed kernels are assessed on two problems: MHC-peptide binding classication using position specic kernels and protein classication based on the substring spectrum of the sequences. Our experiments demonstrate that the incorporation of amino acid properties in string kernels yields improved performances compared to standard string kernels and to previously proposed non-substring kernels. Conclusions: In summary, the proposed modications, in particular the combination with the RBF substring kernel, consistently yield improvements without aecting the computational complexity. The proposed kernels therefore appear to be the kernels of choice for any protein sequence-based inference. Availability: Data sets, code and additional information will be available from http://www.fml.mpg.de/raetsch/suppl/aask. The developed kernels will be part of the next Shogun toolbox release. |
Gesing, Sandra; van Hemert, Jano; Koetsier, Jos; Bertsch, Andreas; Kohlbacher, Oliver TOPP goes Rapid—The OpenMS Proteomics Pipeline in a Grid-enabled Web Portal 10th IEEE/ACM Internactional Conference on Cluster, Cloud, and Grid Computing (ccgrid 2010), pp. 598-599, IEEE/ACM ACM, 2010. @inproceedings{inproceedingsreference.2010-07-13.3868421791, title = {TOPP goes Rapid—The OpenMS Proteomics Pipeline in a Grid-enabled Web Portal}, author = {Sandra Gesing and Jano van Hemert and Jos Koetsier and Andreas Bertsch and Oliver Kohlbacher}, url = {http://www.computer.org/portal/web/csdl/doi/10.1109/CCGRID.2010.39}, year = {2010}, date = {2010-01-01}, booktitle = {10th IEEE/ACM Internactional Conference on Cluster, Cloud, and Grid Computing (ccgrid 2010)}, pages = {598-599}, publisher = {ACM}, organization = {IEEE/ACM}, abstract = {Proteomics, the study of all the proteins contained in a particular sample, e.g., a cell, is a key technology in current biomedical research. The complexity and volume of proteomics data sets produced by mass spectrometric methods clearly suggests the use of grid-based high-performance computing for analysis. TOPP and OpenMS are open-source packages for proteomics data analysis, however, they do not provide support for Grid computing. In this work we present a portal interface for high-throughput data analysis with TOPP. The portal is based on Rapid, a tool for efficiently generating standardized port lets for a wide range of applications. The web-based interface allows the creation and editing of user-defined pipelines and their execution and monitoring on a Grid infrastructure. The portal also supports several file transfer protocols for data staging. It thus provides a simple and complete solution to high-throughput proteomics data analysis for inexperienced users through a convenient portal interface.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Proteomics, the study of all the proteins contained in a particular sample, e.g., a cell, is a key technology in current biomedical research. The complexity and volume of proteomics data sets produced by mass spectrometric methods clearly suggests the use of grid-based high-performance computing for analysis. TOPP and OpenMS are open-source packages for proteomics data analysis, however, they do not provide support for Grid computing. In this work we present a portal interface for high-throughput data analysis with TOPP. The portal is based on Rapid, a tool for efficiently generating standardized port lets for a wide range of applications. The web-based interface allows the creation and editing of user-defined pipelines and their execution and monitoring on a Grid infrastructure. The portal also supports several file transfer protocols for data staging. It thus provides a simple and complete solution to high-throughput proteomics data analysis for inexperienced users through a convenient portal interface. |
Zhang, Fajun; Weggler, Sophie; Ziller, Michael J; Ianeselli, Luca; Heck, B S; Hildebrandt, Andreas; Kohlbacher, Oliver; Skoda, Maxwell W A; Jacobs, Robert M J; Schreiber, Frank On the Universality of Protein Reentrant Condensation in Solution Induced by Multivalent Metal Ions PROTEINS: Structure, Function, Bioinformatics, 78 (16), pp. 3450–3457, 2010. @article{ZWZI2010, title = {On the Universality of Protein Reentrant Condensation in Solution Induced by Multivalent Metal Ions}, author = {Fajun Zhang and Sophie Weggler and Michael J Ziller and Luca Ianeselli and B S Heck and Andreas Hildebrandt and Oliver Kohlbacher and Maxwell W A Skoda and Robert M J Jacobs and Frank Schreiber}, url = {http://onlinelibrary.wiley.com/doi/10.1002/prot.22852}, year = {2010}, date = {2010-01-01}, journal = {PROTEINS: Structure, Function, Bioinformatics}, volume = {78}, number = {16}, pages = {3450–3457}, abstract = {The effective interactions and phase behavior of protein solutions under strong electrostatic coupling conditions are difficult to understand, due to the complex charge pattern and irregular geometry of protein surfaces. This distinguishes them from related systems such as DNA or conventional colloids. In this work, we discuss the question of universality of the reentrant condensation (RC) of proteins in solution induced by multivalent counterions, i.e. redissolution upon adding further salts after phase separation, as recently discovered (Zhang et al. Phys. Rev. Lett. 2008, 101, 148101). The discussion is based on a systematic investigation of five different proteins with different charge patterns and five different multivalent counterions. Zeta potential measurements confirm the effective charge inversion of proteins in the reentrant regime via binding of multivalent counterions, which is supported by Monte Carlo simulations. Charge inversion by trivalent cations requires an overall negative net charge of the protein. Statistical analysis of a representative set of protein sequences reveals that in theory this effect could be possible for about half of all proteins. Our results can be exploited for the control of the phase behavior of proteins, in particular facilitating protein crystallization.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The effective interactions and phase behavior of protein solutions under strong electrostatic coupling conditions are difficult to understand, due to the complex charge pattern and irregular geometry of protein surfaces. This distinguishes them from related systems such as DNA or conventional colloids. In this work, we discuss the question of universality of the reentrant condensation (RC) of proteins in solution induced by multivalent counterions, i.e. redissolution upon adding further salts after phase separation, as recently discovered (Zhang et al. Phys. Rev. Lett. 2008, 101, 148101). The discussion is based on a systematic investigation of five different proteins with different charge patterns and five different multivalent counterions. Zeta potential measurements confirm the effective charge inversion of proteins in the reentrant regime via binding of multivalent counterions, which is supported by Monte Carlo simulations. Charge inversion by trivalent cations requires an overall negative net charge of the protein. Statistical analysis of a representative set of protein sequences reveals that in theory this effect could be possible for about half of all proteins. Our results can be exploited for the control of the phase behavior of proteins, in particular facilitating protein crystallization. |
Birkenheuer, Georg; Breuers, Sebastian; Brinkmann, Andre; Blunk, Dirk; Fels, Gregor; Gesing, Sandra; Herres-Pawlis, Sonja; Kohlbacher, Oliver; Krüger, Jens; Packschies, Lars Grid-Workflows in Molecular Science Lecture Notes in Informatics, pp. 177-184, 2010. @inproceedings{inproceedingsreference.2010-10-04.0034018898, title = {Grid-Workflows in Molecular Science}, author = {Georg Birkenheuer and Sebastian Breuers and Andre Brinkmann and Dirk Blunk and Gregor Fels and Sandra Gesing and Sonja Herres-Pawlis and Oliver Kohlbacher and Jens Krüger and Lars Packschies}, url = {http://subs.emis.de/LNI/Proceedings/Proceedings160/P-160.pdf}, year = {2010}, date = {2010-01-01}, booktitle = {Lecture Notes in Informatics}, number = {P-160}, pages = {177-184}, edition = {GI-Edition}, series = {Software-Engineering 2010}, abstract = {Computational Chemistry gathers information about properties of molecules based on compute intensive simulations. In this area, workflows are an essential instrument for managing complex simulation cascades. The aim of the MoSGrid project is an easy to use Grid integration of such workflows based on a portal that covers the complexity. This paper presents an initial general description of workflows for molecular science and details the result based on two examples for the integration of Gaussian and Gromacs.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Computational Chemistry gathers information about properties of molecules based on compute intensive simulations. In this area, workflows are an essential instrument for managing complex simulation cascades. The aim of the MoSGrid project is an easy to use Grid integration of such workflows based on a portal that covers the complexity. This paper presents an initial general description of workflows for molecular science and details the result based on two examples for the integration of Gaussian and Gromacs. |
Gesing, Sandra; Marton, Istvan; Birkenheuer, Georg; Schuller, Bernd; Grunzke, Richard; Krüger, Jens; Breuers, Sebastian; Blunk, Dirk; Fels, Gregor; Packschies, Lars; Brinkmann, Andre; Kohlbacher, Oliver; Kozlovszky, Miklos Workflow Interoperability in a Grid Portal for Molecular Simulations Proceedings of the International Worshop on Science Gateways (IWSG2010), pp. 44-48, Consorzio COMETA 2010. @inproceedings{IWSG-WFInterop, title = {Workflow Interoperability in a Grid Portal for Molecular Simulations}, author = {Sandra Gesing and Istvan Marton and Georg Birkenheuer and Bernd Schuller and Richard Grunzke and Jens Krüger and Sebastian Breuers and Dirk Blunk and Gregor Fels and Lars Packschies and Andre Brinkmann and Oliver Kohlbacher and Miklos Kozlovszky}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings of the International Worshop on Science Gateways (IWSG2010)}, pages = {44-48}, organization = {Consorzio COMETA}, abstract = {Molecular simulations are an invaluable tool in multiple research areas like chemistry, biology, and physics. The emerging MoSGrid (Molecular Simulation Grid) portal intends to integrate various molecular simulation tools in WS-PGRADE, a workflow-enabled grid portal. The portal will therefore also support the execution of workflows using these simulation codes. UNICORE is a grid middleware with the additional feature of an integrated workflow engine. Here we present a tool to invoke subtasks of a WS-PGRADE workflow in UNICORE and vice versa, to integrate existing UNICORE workflows in WS-PGRADE workflows. Researchers are enabled to create and use both kinds of grid workflows without the need of becoming acquainted to the grid infrastructure or the workflow languages.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Molecular simulations are an invaluable tool in multiple research areas like chemistry, biology, and physics. The emerging MoSGrid (Molecular Simulation Grid) portal intends to integrate various molecular simulation tools in WS-PGRADE, a workflow-enabled grid portal. The portal will therefore also support the execution of workflows using these simulation codes. UNICORE is a grid middleware with the additional feature of an integrated workflow engine. Here we present a tool to invoke subtasks of a WS-PGRADE workflow in UNICORE and vice versa, to integrate existing UNICORE workflows in WS-PGRADE workflows. Researchers are enabled to create and use both kinds of grid workflows without the need of becoming acquainted to the grid infrastructure or the workflow languages. |
Wewior, Martin; Packschies, Lars; Blunk, Dirk; Wickeroth, Daniel; Warzecha, Klaus; Herres-Pawlis, Sonja; Gesing, Sandra; Breuers, Sebastian; Krüger, Jens; Birkenheuer, Georg; Lang, Ulrich The MoSGrid Gaussian portlet -- Technologies for Implementation of Portlets for Molecular Simulations Proceedings of the International Worshop on Science Gateways (IWSG2010), pp. 39-43, Consorzio COMETA 2010. @inproceedings{IWSG10-G-Portlet, title = {The MoSGrid Gaussian portlet -- Technologies for Implementation of Portlets for Molecular Simulations}, author = {Martin Wewior and Lars Packschies and Dirk Blunk and Daniel Wickeroth and Klaus Warzecha and Sonja Herres-Pawlis and Sandra Gesing and Sebastian Breuers and Jens Krüger and Georg Birkenheuer and Ulrich Lang}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings of the International Worshop on Science Gateways (IWSG2010)}, pages = {39-43}, organization = {Consorzio COMETA}, abstract = {The development of a portlet for the MoSGrid (Molecular Simulation Grid) web portal is described. This portlet enables scientists in the field of Computational Chemistry to perform quantum chemical simulations on remote High Performance Computing platforms through a standards-compliant web browser. Since this portlet has a prototype character for further developments in MoSGrid and possibly beyond, the technologies used for its implementation were thoroughly evaluated and establish a common platform for future work.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The development of a portlet for the MoSGrid (Molecular Simulation Grid) web portal is described. This portlet enables scientists in the field of Computational Chemistry to perform quantum chemical simulations on remote High Performance Computing platforms through a standards-compliant web browser. Since this portlet has a prototype character for further developments in MoSGrid and possibly beyond, the technologies used for its implementation were thoroughly evaluated and establish a common platform for future work. |
Hildebrandt, Andreas; Dehof, Anna Katharina; Rurainski, Alexander; Bertsch, Andreas; Schumann, Marcel; Toussaint, Nora C; Moll, Andreas; Stöckel, Daniel; Nickels, Stefan; Mueller, Sabine C; Lenhof, Hans-Peter; Kohlbacher, Oliver BALL - Biochemical Algorithms Library 1.3 BMC Bioinformatics, 11 , pp. 531, 2010. @article{BALL-1.3, title = {BALL - Biochemical Algorithms Library 1.3}, author = {Andreas Hildebrandt and Anna Katharina Dehof and Alexander Rurainski and Andreas Bertsch and Marcel Schumann and Nora C Toussaint and Andreas Moll and Daniel Stöckel and Stefan Nickels and Sabine C Mueller and Hans-Peter Lenhof and Oliver Kohlbacher}, doi = {https://doi.org/10.1186/1471-2105-11-531}, year = {2010}, date = {2010-01-01}, journal = {BMC Bioinformatics}, volume = {11}, pages = {531}, abstract = {Background The Biochemical Algorithms Library (BALL) is a comprehensive rapid application development framework for structural bioinformatics. It provides an extensive C++ class library of data structures and algorithms for molecular modeling and structural bioinformatics. Using BALL as a programming toolbox does not only allow to greatly reduce application development times but also helps in ensuring stability and correctness by avoiding the error-prone reimplementation of complex algorithms and replacing them with calls into the library that has been well-tested by a large number of developers. In the ten years since its original publication, BALL has seen a substantial increase in functionality and numerous other improvements. Results Here, we discuss BALL's current functionality and highlight the key additions and improvements: support for additional file formats, molecular edit-functionality, new molecular mechanics force fields, novel energy minimization techniques, docking algorithms, and support for cheminformatics and QSAR studies. Conclusions BALL is available for all major operating systems, including Linux, Windows, and MacOS X. It is available free of charge under the Lesser GNU Public License (LPGP). Parts of the code are distributed under the GNU Public License (GPL). BALL is available as source code and binary packages from the project web site at http://www.ball-project.org. Recently, it has been accepted into the debian project; integration into further distributions is currently pursued.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background The Biochemical Algorithms Library (BALL) is a comprehensive rapid application development framework for structural bioinformatics. It provides an extensive C++ class library of data structures and algorithms for molecular modeling and structural bioinformatics. Using BALL as a programming toolbox does not only allow to greatly reduce application development times but also helps in ensuring stability and correctness by avoiding the error-prone reimplementation of complex algorithms and replacing them with calls into the library that has been well-tested by a large number of developers. In the ten years since its original publication, BALL has seen a substantial increase in functionality and numerous other improvements. Results Here, we discuss BALL's current functionality and highlight the key additions and improvements: support for additional file formats, molecular edit-functionality, new molecular mechanics force fields, novel energy minimization techniques, docking algorithms, and support for cheminformatics and QSAR studies. Conclusions BALL is available for all major operating systems, including Linux, Windows, and MacOS X. It is available free of charge under the Lesser GNU Public License (LPGP). Parts of the code are distributed under the GNU Public License (GPL). BALL is available as source code and binary packages from the project web site at http://www.ball-project.org. Recently, it has been accepted into the debian project; integration into further distributions is currently pursued. |
Henneges, Carsten; Röttig, Marc; Kohlbacher, Oliver; Zell, Andreas Graphlet Data Mining of Energetical Interaction Patterns in Protein 3D Structures Proceedings of the International Conference on Fuzzy Computation and 2nd International Conference on Neural Computation, SciTePress, 2010. @conference{GraphletPaperHennegesRoettig, title = {Graphlet Data Mining of Energetical Interaction Patterns in Protein 3D Structures}, author = {Carsten Henneges and Marc Röttig and Oliver Kohlbacher and Andreas Zell}, url = {http://dx.doi.org/10.5220/0003077501900195}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings of the International Conference on Fuzzy Computation and 2nd International Conference on Neural Computation}, pages = {190-195}, publisher = {SciTePress}, abstract = {Interactions between secondary structure elements (SSEs) in the core of proteins are evolutionary conserved and define the overall fold of proteins. They can thus be used to classify protein families. Using a graph representation of SSE interactions and data mining techniques we identify overrepresented graphlets that can be used for protein classification. We find, in total, 627 significant graphlets within the ICGEB Protein Benchmark database (SCOP40mini) and the Super-Secondary Structure database (SSSDB). Based on graphlets, decision trees are able to predict the four SCOP levels and SSSDB (sub)motif classes with a mean Area Under Curve (AUC) better than 0.89 (5-fold CV). Regularized decision trees reveal that for each classification task about 20 graphlets suffice for reliable predictions. Graphlets composed of five secondary structure interactions are most informative. Finally, we find that graphlets can be predicted from secondary structure using decision trees (5-fold CV) with a Matthews Correlation Coefficient (MCC) reaching up to 0.7.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Interactions between secondary structure elements (SSEs) in the core of proteins are evolutionary conserved and define the overall fold of proteins. They can thus be used to classify protein families. Using a graph representation of SSE interactions and data mining techniques we identify overrepresented graphlets that can be used for protein classification. We find, in total, 627 significant graphlets within the ICGEB Protein Benchmark database (SCOP40mini) and the Super-Secondary Structure database (SSSDB). Based on graphlets, decision trees are able to predict the four SCOP levels and SSSDB (sub)motif classes with a mean Area Under Curve (AUC) better than 0.89 (5-fold CV). Regularized decision trees reveal that for each classification task about 20 graphlets suffice for reliable predictions. Graphlets composed of five secondary structure interactions are most informative. Finally, we find that graphlets can be predicted from secondary structure using decision trees (5-fold CV) with a Matthews Correlation Coefficient (MCC) reaching up to 0.7. |
Trusch, A; Ehlert, S; Bertsch, A; Kohlbacher, O; Hildebrand, D; Schlüter, H; Tallarek, U Improved particle-packed HPLC/MS microchips for proteomic analysis J. Pep. Sci, 33 (21), pp. 3283-91, 2010. @article{MicrochipHPLC, title = {Improved particle-packed HPLC/MS microchips for proteomic analysis}, author = {A Trusch and S Ehlert and A Bertsch and O Kohlbacher and D Hildebrand and H Schlüter and U Tallarek}, doi = {https://doi.org/10.1002/jssc.201000474}, year = {2010}, date = {2010-01-01}, journal = {J. Pep. Sci}, volume = {33}, number = {21}, pages = {3283-91}, abstract = {The influence of packing process parameters (packing pressure, application of ultrasound) and the stationary phase particle size (3.5 and 5 μm) on the chromatographic performance of HPLC/MS chips was systematically investigated for proteomic samples. First, reproducibility and detection limits of the separation were evaluated with a low-complexity sample of tryptic BSA peptides. The influence of adsorbent packing quality on protein identification was then tested with a typical proteomics sample of high complexity, a human plasma protein fraction (Cohn fraction IV-4). All HPLC/MS chips provided highly reproducible separations of these proteomic samples, but improved packing conditions and smaller particle sizes resulted in chromatograms with narrower peaks and correspondingly higher signal intensities. Improved separation performance increased the peak capacity, the number of identified peptides, and thus the sequence coverage in the proteomic samples, particularly for low sample amounts.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The influence of packing process parameters (packing pressure, application of ultrasound) and the stationary phase particle size (3.5 and 5 μm) on the chromatographic performance of HPLC/MS chips was systematically investigated for proteomic samples. First, reproducibility and detection limits of the separation were evaluated with a low-complexity sample of tryptic BSA peptides. The influence of adsorbent packing quality on protein identification was then tested with a typical proteomics sample of high complexity, a human plasma protein fraction (Cohn fraction IV-4). All HPLC/MS chips provided highly reproducible separations of these proteomic samples, but improved packing conditions and smaller particle sizes resulted in chromatograms with narrower peaks and correspondingly higher signal intensities. Improved separation performance increased the peak capacity, the number of identified peptides, and thus the sequence coverage in the proteomic samples, particularly for low sample amounts. |
Neumann, Dirk; Kohlbacher, Oliver Structural Glycomics – Molecular Details of Protein-Carbohydrate Interactions and their Prediction Hicks, Martin G; Kettner, Carsten (Ed.): Proceedings of the International Beilstein Symposium on Glyco-Bioinformatics, Beilstein-Institut, Frankfurt, Germany, 2010. @inproceedings{Beilstein-GlycoBio2009, title = {Structural Glycomics – Molecular Details of Protein-Carbohydrate Interactions and their Prediction}, author = {Dirk Neumann and Oliver Kohlbacher}, editor = {Martin G Hicks and Carsten Kettner}, url = {http://beilstein-institut.de/glycobioinf2009/Proceedings/Kohlbacher/Kohlbacher.pdf}, year = {2010}, date = {2010-01-01}, booktitle = {Proceedings of the International Beilstein Symposium on Glyco-Bioinformatics}, publisher = {Beilstein-Institut, Frankfurt, Germany}, abstract = {Protein-ligand docking is an essential technique in computer-aided drug design. While generally available docking programs work well for most drug classes, carbohydrates and carbohydrate-like compounds are often problematic for docking. We discuss the peculiarities of protein-carbohydrate interactions and their impact on protein-carbohydrate docking and review the state of the art in docking of carbohydrates to proteins. Finally, we give an overview of carbohydrate docking studies and present a new docking method specifically designed to handle docking of carbohydrate-like compounds. BALLDock/SLICK combines an evolutionary docking algorithm for flexible ligands and flexible receptor side chains with carbohydrate-specific scoring and energy functions. The scoring function has been designed to identify accurate ligand poses, while the energy function yields accurate estimates of the binding free energies of these poses. On a test set of known protein-sugar complexes we demonstrate the ability of the approach to generate correct poses for almost all of the structures and achieve very low mean errors for the predicted binding free energies.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Protein-ligand docking is an essential technique in computer-aided drug design. While generally available docking programs work well for most drug classes, carbohydrates and carbohydrate-like compounds are often problematic for docking. We discuss the peculiarities of protein-carbohydrate interactions and their impact on protein-carbohydrate docking and review the state of the art in docking of carbohydrates to proteins. Finally, we give an overview of carbohydrate docking studies and present a new docking method specifically designed to handle docking of carbohydrate-like compounds. BALLDock/SLICK combines an evolutionary docking algorithm for flexible ligands and flexible receptor side chains with carbohydrate-specific scoring and energy functions. The scoring function has been designed to identify accurate ligand poses, while the energy function yields accurate estimates of the binding free energies of these poses. On a test set of known protein-sugar complexes we demonstrate the ability of the approach to generate correct poses for almost all of the structures and achieve very low mean errors for the predicted binding free energies. |
Venturelli, S; von Horn, K; Berger, A; Weiland, T; Smirnow, I; Schenk, A; Weiss, T S; Kämper, A; Kohlbacher, O; Gregor, M; Lauer, U M; Bitzer, M Identifikation und präklinische Charakterisierung neuartiger epigenetischer Wirkstoffe zur Behandlung therapieresistenter Tumore am Beispiel des Hepatozellulären Karzinoms Z Gastroenterol, 47 (06), pp. P1_13, 2009. @article{Venturelli2009, title = {Identifikation und präklinische Charakterisierung neuartiger epigenetischer Wirkstoffe zur Behandlung therapieresistenter Tumore am Beispiel des Hepatozellulären Karzinoms}, author = {S Venturelli and K von Horn and A Berger and T Weiland and I Smirnow and A Schenk and T S Weiss and A Kämper and O Kohlbacher and M Gregor and U M Lauer and M Bitzer}, year = {2009}, date = {2009-01-01}, journal = {Z Gastroenterol}, volume = {47}, number = {06}, pages = {P1_13}, abstract = {Fragestellung: Epigenetische Wirkstoffe sind in den letzten Jahren zunehmend in den Fokus innovativer Therapiekonzepte der Tumorforschung gerückt. Insbesondere Inhibitoren der zellulären Histondeacetylase (HDAC) zeigen ein breites Anwendungsspektrum bei gleichzeitig geringer Toxizität. Speziell Chemotherapie-resistente Tumore sprechen sensibel auf diese Wirkstoffklasse an; daher wird aktuell intensiv nach neuen Substanzen gesucht, die eine HDAC-inhibitorische Aktivität aufweisen. Methoden: Als Modell wurden humane Hepatomzelllinien (HepG2, Hep3B, HuH7) und primäre humane Hepatozyten (PHH) von verschiedenen Spendern eingesetzt. Die Substanzen wurden durch HDAC-Inhibitor-Assays, Docking-Analysen, SRB-Viabilitäts-Testung, Fluoresceindiacetat-Assays, Real-Time-Cell-Monitoring, LDH- und GOT-Bestimmungen und Western-Blotting charakterisiert. Ergebnisse: Es wurden initial verschiedene interessante chemische Strukturen auf eine potentielle epigenetische Wirkung hin untersucht. Bei zwei Abkömmlingen der Stoffgruppe der Hydroxaminsäuren konnte eine bis dato unbekannte HDAC-inhibitorische Aktivität am Computermodell identifiziert werden. Eine molekularbiologische Bestätigung erfolgte einerseits direkt mittels zellfreiem Testsystem und anhand einer zellulären globalen Histon-Hyperacetylierung. Die Inkubation von verschiedenen humanen Hepatomzelllinien mit diesen beiden neuen HDAC-Inhibitoren zeigte schon bei geringen Konzentrationen eine signifikant verminderte Proliferation bzw. Reduktion der Viabilität bei einem gleichzeitigen Anstieg des zellulären Toxizitätsparameters LDH. Interessanterweise ergaben detaillierte Toxizitätsanalysen mit PHH von verschiedenen Spender-Patienten auch bei hohen Konzentrationen keine bzw. nur geringfügige Hinweise auf eine zytotoxische Wirkung. Schlussfolgerung: Es konnte zum einen eine bisher unbekannte Klasse an HDAC-Inhibitoren identifiziert und somit das Spektrum epigenetischer Wirkstoffe erfolgreich erweitert werden. Zum anderen zeigte die Charakterisierung dieser Substanzen eine ausgewiesene antipoliferative Wirkung auf HCC-Tumorzellen bei gleichzeitig guter Verträglichkeit für nicht-maligne Leberzellen, so dass diese Derivate zukünftig entweder unverändert oder in chemisch modifizierter Form als hochpotente HDAC-Inhibitoren klinisch weiter evaluiert werden sollten.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Fragestellung: Epigenetische Wirkstoffe sind in den letzten Jahren zunehmend in den Fokus innovativer Therapiekonzepte der Tumorforschung gerückt. Insbesondere Inhibitoren der zellulären Histondeacetylase (HDAC) zeigen ein breites Anwendungsspektrum bei gleichzeitig geringer Toxizität. Speziell Chemotherapie-resistente Tumore sprechen sensibel auf diese Wirkstoffklasse an; daher wird aktuell intensiv nach neuen Substanzen gesucht, die eine HDAC-inhibitorische Aktivität aufweisen. Methoden: Als Modell wurden humane Hepatomzelllinien (HepG2, Hep3B, HuH7) und primäre humane Hepatozyten (PHH) von verschiedenen Spendern eingesetzt. Die Substanzen wurden durch HDAC-Inhibitor-Assays, Docking-Analysen, SRB-Viabilitäts-Testung, Fluoresceindiacetat-Assays, Real-Time-Cell-Monitoring, LDH- und GOT-Bestimmungen und Western-Blotting charakterisiert. Ergebnisse: Es wurden initial verschiedene interessante chemische Strukturen auf eine potentielle epigenetische Wirkung hin untersucht. Bei zwei Abkömmlingen der Stoffgruppe der Hydroxaminsäuren konnte eine bis dato unbekannte HDAC-inhibitorische Aktivität am Computermodell identifiziert werden. Eine molekularbiologische Bestätigung erfolgte einerseits direkt mittels zellfreiem Testsystem und anhand einer zellulären globalen Histon-Hyperacetylierung. Die Inkubation von verschiedenen humanen Hepatomzelllinien mit diesen beiden neuen HDAC-Inhibitoren zeigte schon bei geringen Konzentrationen eine signifikant verminderte Proliferation bzw. Reduktion der Viabilität bei einem gleichzeitigen Anstieg des zellulären Toxizitätsparameters LDH. Interessanterweise ergaben detaillierte Toxizitätsanalysen mit PHH von verschiedenen Spender-Patienten auch bei hohen Konzentrationen keine bzw. nur geringfügige Hinweise auf eine zytotoxische Wirkung. Schlussfolgerung: Es konnte zum einen eine bisher unbekannte Klasse an HDAC-Inhibitoren identifiziert und somit das Spektrum epigenetischer Wirkstoffe erfolgreich erweitert werden. Zum anderen zeigte die Charakterisierung dieser Substanzen eine ausgewiesene antipoliferative Wirkung auf HCC-Tumorzellen bei gleichzeitig guter Verträglichkeit für nicht-maligne Leberzellen, so dass diese Derivate zukünftig entweder unverändert oder in chemisch modifizierter Form als hochpotente HDAC-Inhibitoren klinisch weiter evaluiert werden sollten. |
Rätsch, Gunnar; Lohmann, Jan U; Kohlbacher, Oliver; Schultheiss, Sebastian J; Busch, Wolfgang KIRMES: kernel-based identification of regulatory modules in euchromatic sequences Bioinformatics, 25 (16), pp. 2126-2133, 2009, ISSN: 1367-4803. @article{10.1093/bioinformatics/btp278, title = {KIRMES: kernel-based identification of regulatory modules in euchromatic sequences}, author = {Gunnar Rätsch and Jan U Lohmann and Oliver Kohlbacher and Sebastian J Schultheiss and Wolfgang Busch}, url = {https://doi.org/10.1093/bioinformatics/btp278}, doi = {10.1093/bioinformatics/btp278}, issn = {1367-4803}, year = {2009}, date = {2009-01-01}, journal = {Bioinformatics}, volume = {25}, number = {16}, pages = {2126-2133}, abstract = {Motivation: Understanding transcriptional regulation is one of the main challenges in computational biology. An important problem is the identification of transcription factor (TF) binding sites in promoter regions of potential TF target genes. It is typically approached by position weight matrix-based motif identification algorithms using Gibbs sampling, or heuristics to extend seed oligos. Such algorithms succeed in identifying single, relatively well-conserved binding sites, but tend to fail when it comes to the identification of combinations of several degenerate binding sites, as those often found in cis-regulatory modules.Results: We propose a new algorithm that combines the benefits of existing motif finding with the ones of support vector machines (SVMs) to find degenerate motifs in order to improve the modeling of regulatory modules. In experiments on microarray data from Arabidopsis thaliana, we were able to show that the newly developed strategy significantly improves the recognition of TF targets.Availability: The python source code (open source-licensed under GPL), the data for the experiments and a Galaxy-based web service are available at http://www.fml.mpg.de/raetsch/suppl/kirmes/Contact: information:Supplementary data are available at Bioinformatics online.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Motivation: Understanding transcriptional regulation is one of the main challenges in computational biology. An important problem is the identification of transcription factor (TF) binding sites in promoter regions of potential TF target genes. It is typically approached by position weight matrix-based motif identification algorithms using Gibbs sampling, or heuristics to extend seed oligos. Such algorithms succeed in identifying single, relatively well-conserved binding sites, but tend to fail when it comes to the identification of combinations of several degenerate binding sites, as those often found in cis-regulatory modules.Results: We propose a new algorithm that combines the benefits of existing motif finding with the ones of support vector machines (SVMs) to find degenerate motifs in order to improve the modeling of regulatory modules. In experiments on microarray data from Arabidopsis thaliana, we were able to show that the newly developed strategy significantly improves the recognition of TF targets.Availability: The python source code (open source-licensed under GPL), the data for the experiments and a Galaxy-based web service are available at http://www.fml.mpg.de/raetsch/suppl/kirmes/Contact: information:Supplementary data are available at Bioinformatics online. |
Dräger, Andreas; Kronfeld, Marcel; Ziller, Michael J; Supper, Jochen; Planatscher, Hannes; Magnus, Jørgen B; Oldiges, Marco; Kohlbacher, Oliver; Zell, Andreas Modeling Metabolic Networks in C. glutamicum: A Comparison of Rate Laws in Combination with Various Parameter Optimization Strategies BMC Systems Biology, 3 , pp. 5, 2009. @article{ModMetaNetw, title = {Modeling Metabolic Networks in C. glutamicum: A Comparison of Rate Laws in Combination with Various Parameter Optimization Strategies}, author = {Andreas Dräger and Marcel Kronfeld and Michael J Ziller and Jochen Supper and Hannes Planatscher and Jørgen B Magnus and Marco Oldiges and Oliver Kohlbacher and Andreas Zell}, url = {http://www.biomedcentral.com/1752-0509/3/5}, year = {2009}, date = {2009-01-01}, journal = {BMC Systems Biology}, volume = {3}, pages = {5}, abstract = {Background To understand the dynamic behavior of cellular systems, mathematical modeling is often necessary and comprises three steps: (1) experimental measurement of participating molecules, (2) assignment of rate laws to each reaction, and (3) parameter calibration with respect to the measurements. In each of these steps the modeler is confronted with a plethora of alternative approaches, e. g., the selection of approximative rate laws in step two as specific equations are often unknown, or the choice of an estimation procedure with its specific settings in step three. This overall process with its numerous choices and the mutual influence between them makes it hard to single out the best modeling approach for a given problem. Results We investigate the modeling process using multiple kinetic equations together with various parameter optimization methods for a well-characterized example network, the biosynthesis of valine and leucine in C. glutamicum. For this purpose, we derive seven dynamic models based on generalized mass action, Michaelis-Menten and convenience kinetics as well as the stochastic Langevin equation. In addition, we introduce two modeling approaches for feedback inhibition to the mass action kinetics. The parameters of each model are estimated using eight optimization strategies. To determine the most promising modeling approaches together with the best optimization algorithms, we carry out a two-step benchmark: (1) coarse-grained comparison of the algorithms on all models and (2) fine-grained tuning of the best optimization algorithms and models. To analyze the space of the best parameters found for each model, we apply clustering, variance, and correlation analysis. Conclusion A mixed model based on the convenience rate law and the Michaelis-Menten equation, in which all reactions are assumed to be reversible, is the most suitable deterministic modeling approach followed by a reversible generalized mass action kinetics model. A Langevin model is advisable to take stochastic effects into account. To estimate the model parameters, three algorithms are particularly useful: For first attempts the settings-free Tribes algorithm yields valuable results. Particle swarm optimization and differential evolution provide significantly better results with appropriate settings.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background To understand the dynamic behavior of cellular systems, mathematical modeling is often necessary and comprises three steps: (1) experimental measurement of participating molecules, (2) assignment of rate laws to each reaction, and (3) parameter calibration with respect to the measurements. In each of these steps the modeler is confronted with a plethora of alternative approaches, e. g., the selection of approximative rate laws in step two as specific equations are often unknown, or the choice of an estimation procedure with its specific settings in step three. This overall process with its numerous choices and the mutual influence between them makes it hard to single out the best modeling approach for a given problem. Results We investigate the modeling process using multiple kinetic equations together with various parameter optimization methods for a well-characterized example network, the biosynthesis of valine and leucine in C. glutamicum. For this purpose, we derive seven dynamic models based on generalized mass action, Michaelis-Menten and convenience kinetics as well as the stochastic Langevin equation. In addition, we introduce two modeling approaches for feedback inhibition to the mass action kinetics. The parameters of each model are estimated using eight optimization strategies. To determine the most promising modeling approaches together with the best optimization algorithms, we carry out a two-step benchmark: (1) coarse-grained comparison of the algorithms on all models and (2) fine-grained tuning of the best optimization algorithms and models. To analyze the space of the best parameters found for each model, we apply clustering, variance, and correlation analysis. Conclusion A mixed model based on the convenience rate law and the Michaelis-Menten equation, in which all reactions are assumed to be reversible, is the most suitable deterministic modeling approach followed by a reversible generalized mass action kinetics model. A Langevin model is advisable to take stochastic effects into account. To estimate the model parameters, three algorithms are particularly useful: For first attempts the settings-free Tribes algorithm yields valuable results. Particle swarm optimization and differential evolution provide significantly better results with appropriate settings. |
Mitschke, Jan; Fuss, Janina; Blum, Torsten; Höglund, Annette; Reski, Ralf; Kohlbacher, Oliver; Rensing, Stefan Prediction of dual protein targeting to plant organelles New Phytologist, 183 (1), pp. 224-236, 2009. @article{ATPb, title = {Prediction of dual protein targeting to plant organelles}, author = {Jan Mitschke and Janina Fuss and Torsten Blum and Annette Höglund and Ralf Reski and Oliver Kohlbacher and Stefan Rensing}, url = {http://dx.doi.org/10.1111/j.1469-8137.2009.02832.x}, year = {2009}, date = {2009-01-01}, journal = {New Phytologist}, volume = {183}, number = {1}, pages = {224-236}, abstract = {Dual targeting of proteins to more than one subcellular localization has been found in animals, in fungi and in plants. In the latter, ambiguous N-terminal targeting signals have been described that result in the protein being located in both mitochondria and plastids. We have developed ambiguous targeting predictor (ATP), a machine-learning implementation that classifies such ambiguous targeting signals. * Ambiguous targeting predictor is based on a support vector machine implementation that makes use of 12 different amino acid features. Prediction results were validated using fluorescent protein fusion. * Both in silico and in vivo evaluations demonstrate that ambiguous targeting predictor is useful for predicting dual targeting to mitochondria and plastids. Proteins that are targeted to both organelles by tandemly arrayed signals (so-called twin targeting) can be predicted by both ambiguous targeting predictor and a combination of single targeting prediction tools. Comparison of ambiguous targeting predictor with previous experimental approaches, as well as in silico approaches, shows good congruence. * Based on the prediction results, land plant genomes are expected to encode, on average, > 400 proteins that are located in mitochondria and plastids. Ambiguous targeting predictor is helpful for functional genome annotation and can be used as a tool to further our understanding about dual protein targeting and its evolution.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Dual targeting of proteins to more than one subcellular localization has been found in animals, in fungi and in plants. In the latter, ambiguous N-terminal targeting signals have been described that result in the protein being located in both mitochondria and plastids. We have developed ambiguous targeting predictor (ATP), a machine-learning implementation that classifies such ambiguous targeting signals. * Ambiguous targeting predictor is based on a support vector machine implementation that makes use of 12 different amino acid features. Prediction results were validated using fluorescent protein fusion. * Both in silico and in vivo evaluations demonstrate that ambiguous targeting predictor is useful for predicting dual targeting to mitochondria and plastids. Proteins that are targeted to both organelles by tandemly arrayed signals (so-called twin targeting) can be predicted by both ambiguous targeting predictor and a combination of single targeting prediction tools. Comparison of ambiguous targeting predictor with previous experimental approaches, as well as in silico approaches, shows good congruence. * Based on the prediction results, land plant genomes are expected to encode, on average, > 400 proteins that are located in mitochondria and plastids. Ambiguous targeting predictor is helpful for functional genome annotation and can be used as a tool to further our understanding about dual protein targeting and its evolution. |
Malisi, Christoph; Kohlbacher, Oliver; Höcker, Birte Automated scaffold selection for enzyme design PROTEINS: Structure, Function, and Bioinformatics, 77 (1), pp. 74-83, 2009. @article{ScaffoldSelection, title = {Automated scaffold selection for enzyme design}, author = {Christoph Malisi and Oliver Kohlbacher and Birte Höcker}, url = {http://dx.doi.org/10.1002/prot.22418}, year = {2009}, date = {2009-01-01}, journal = {PROTEINS: Structure, Function, and Bioinformatics}, volume = {77}, number = {1}, pages = {74-83}, abstract = {A major goal of computational protein design is the construction of novel functions on existing protein scaffolds. There the first question is which scaffold is suitable for a specific reaction. Given a set of catalytic residues and their spatial arrangement, one wants to identify a protein scaffold that can host this active site. Here, we present an algorithm called ScaffoldSelection that is able to rapidly search large sets of protein structures for potential attachment sites of an enzymatic motif. The method consists of two steps; it first identifies pairs of backbone positions in pocket-like regions. Then, it combines these to complete attachment sites using a graph theoretical approach. Identified matches are assessed for their ability to accommodate the substrate or transition state. A representative set of structures from the Protein Data Bank ( approximately 3500) was searched for backbone geometries that support the catalytic residues for 12 chemical reactions. Recapitulation of native active site geometries is used as a benchmark for the performance of the program. The native motif is identified in all 12 test cases, ranking it in the top percentile in 5 out of 12. The algorithm is fast and efficient, although dependent on the complexity of the motif. Comparisons to other methods show that ScaffoldSelection performs equally well in terms of accuracy and far better in terms of speed. Thus, ScaffoldSelection will aid future computational protein design experiments by preselecting protein scaffolds that are suitable for a specific reaction type and the introduction of a predefined amino acid motif.}, keywords = {}, pubstate = {published}, tppubtype = {article} } A major goal of computational protein design is the construction of novel functions on existing protein scaffolds. There the first question is which scaffold is suitable for a specific reaction. Given a set of catalytic residues and their spatial arrangement, one wants to identify a protein scaffold that can host this active site. Here, we present an algorithm called ScaffoldSelection that is able to rapidly search large sets of protein structures for potential attachment sites of an enzymatic motif. The method consists of two steps; it first identifies pairs of backbone positions in pocket-like regions. Then, it combines these to complete attachment sites using a graph theoretical approach. Identified matches are assessed for their ability to accommodate the substrate or transition state. A representative set of structures from the Protein Data Bank ( approximately 3500) was searched for backbone geometries that support the catalytic residues for 12 chemical reactions. Recapitulation of native active site geometries is used as a benchmark for the performance of the program. The native motif is identified in all 12 test cases, ranking it in the top percentile in 5 out of 12. The algorithm is fast and efficient, although dependent on the complexity of the motif. Comparisons to other methods show that ScaffoldSelection performs equally well in terms of accuracy and far better in terms of speed. Thus, ScaffoldSelection will aid future computational protein design experiments by preselecting protein scaffolds that are suitable for a specific reaction type and the introduction of a predefined amino acid motif. |
Toussaint, Nora C; Kohlbacher, Oliver OptiTope - A Web Server for the Selection of an Optimal Set of Peptides for Epitope-based Vaccines Nucl. Acids Res., 37 , pp. W617-22, 2009. @article{OptiTope, title = {OptiTope - A Web Server for the Selection of an Optimal Set of Peptides for Epitope-based Vaccines}, author = {Nora C Toussaint and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1093/nar/gkp293}, year = {2009}, date = {2009-01-01}, journal = {Nucl. Acids Res.}, volume = {37}, pages = {W617-22}, abstract = {Epitope-based vaccines (EVs) have recently been attracting significant interest. They trigger an immune response by confronting the immune system with immunogenic peptides derived from, e.g. viral- or cancer-related proteins. Binding of these peptides to proteins from the major histocompatibility complex (MHC) is crucial for immune system activation. However, since the MHC is highly polymorphic, different patients typically bind different repertoires of peptides. Furthermore, economical and regulatory issues impose strong limitations on the number of peptides that can be included in an EV. Hence, it is crucial to identify the optimal set of peptides for a vaccine, given constraints such as MHC allele probabilities in the target population, peptide mutation rates and maximum number of selected peptides. OptiTope aims at assisting immunologists in this critical task. With OptiTope, we provide an easy-to-use tool to determine a provably optimal set of epitopes with respect to overall immunogenicity in a specific individual (personalized medicine) or a target population (e.g. a certain ethnic group). OptiTope is available at http://www.epitoolkit.org/optitope.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Epitope-based vaccines (EVs) have recently been attracting significant interest. They trigger an immune response by confronting the immune system with immunogenic peptides derived from, e.g. viral- or cancer-related proteins. Binding of these peptides to proteins from the major histocompatibility complex (MHC) is crucial for immune system activation. However, since the MHC is highly polymorphic, different patients typically bind different repertoires of peptides. Furthermore, economical and regulatory issues impose strong limitations on the number of peptides that can be included in an EV. Hence, it is crucial to identify the optimal set of peptides for a vaccine, given constraints such as MHC allele probabilities in the target population, peptide mutation rates and maximum number of selected peptides. OptiTope aims at assisting immunologists in this critical task. With OptiTope, we provide an easy-to-use tool to determine a provably optimal set of epitopes with respect to overall immunogenicity in a specific individual (personalized medicine) or a target population (e.g. a certain ethnic group). OptiTope is available at http://www.epitoolkit.org/optitope. |
Schultheiss, Sebastian J; Busch, Wolfgang; Lohmann, Jan U; Kohlbacher, Oliver; Rätsch, Gunnar KIRMES: Kernel-based identification of regulatory modules in euchromatic sequences Bioinformatics, 25 (16), pp. 2126-33, 2009. @article{KIRMES, title = {KIRMES: Kernel-based identification of regulatory modules in euchromatic sequences}, author = {Sebastian J Schultheiss and Wolfgang Busch and Jan U Lohmann and Oliver Kohlbacher and Gunnar Rätsch}, url = {http://dx.doi.org/10.1093/bioinformatics/btp278}, year = {2009}, date = {2009-01-01}, journal = {Bioinformatics}, volume = {25}, number = {16}, pages = {2126-33}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Nahnsen, Sven; Nordheim, Alfred; Kohlbacher, Oliver A geometric matching approach improves throughput and accurary in DIGE based proteomics Proceedings of the sixth International Workshop on Computational Systems Biology (WCSB 2009), pp. 123-126, Tampere International Center for Signal Processing WCSB 2009, 2009. @inproceedings{DIGE-Matching, title = {A geometric matching approach improves throughput and accurary in DIGE based proteomics}, author = {Sven Nahnsen and Alfred Nordheim and Oliver Kohlbacher}, url = {http://www.cs.tut.fi/wcsb09/wcsb2009proceedings.pdf}, year = {2009}, date = {2009-01-01}, booktitle = {Proceedings of the sixth International Workshop on Computational Systems Biology (WCSB 2009)}, pages = {123-126}, publisher = {WCSB 2009}, organization = {Tampere International Center for Signal Processing}, series = {#48}, abstract = {Two-Dimensional Difference In Gel Electrophoresis (2DDIGE) is a powerful tool in quantitative proteomics. However, the matching of replication experiments remains very labor-intensive and hard to automate. We present a novel geometric approach based on landmarks - unambiguously identified proteins - for inter-gel matching where we transform the pairwise gel matching problem into a maximum weight matching problem. We then apply network flow algorithms to solve this problem optimally. The approach automatically selects proteins that are reproducibly regulated across several experiments without manual intervention. In a case study our method outperformed common commercially available tools. Source code is available upon request from the authors.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Two-Dimensional Difference In Gel Electrophoresis (2DDIGE) is a powerful tool in quantitative proteomics. However, the matching of replication experiments remains very labor-intensive and hard to automate. We present a novel geometric approach based on landmarks - unambiguously identified proteins - for inter-gel matching where we transform the pairwise gel matching problem into a maximum weight matching problem. We then apply network flow algorithms to solve this problem optimally. The approach automatically selects proteins that are reproducibly regulated across several experiments without manual intervention. In a case study our method outperformed common commercially available tools. Source code is available upon request from the authors. |
Sturm, Marc; Kohlbacher, Oliver TOPPView: An Open-Source Viewer for Mass Spectrometry Data J. Proteome Res., 8 (7), pp. 3760-3, 2009. @article{TOPPView, title = {TOPPView: An Open-Source Viewer for Mass Spectrometry Data}, author = {Marc Sturm and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1021/pr900171m}, year = {2009}, date = {2009-01-01}, journal = {J. Proteome Res.}, volume = {8}, number = {7}, pages = {3760-3}, abstract = {Visualization of complex mass spectrometric data sets is becoming increasingly important in proteomics and metabolomics. We present TOPPView, an integrated data visualization and analysis tool for mass spectrometric data sets. TOPPView allows the visualization and comparison of individual mass spectra, two-dimensional LC-MS data sets and their accompanying metadata. By supporting standardized XML-based data exchange formats, data import is possible from any type of mass spectrometer. The integrated analysis tools of the OpenMS Proteomics Pipeline (TOPP) allow efficient data analysis from within TOPPView through a convenient graphical user interface. TOPPView runs on all major operating systems and is available free of charge under an open-source license at http://www.openms.de.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Visualization of complex mass spectrometric data sets is becoming increasingly important in proteomics and metabolomics. We present TOPPView, an integrated data visualization and analysis tool for mass spectrometric data sets. TOPPView allows the visualization and comparison of individual mass spectra, two-dimensional LC-MS data sets and their accompanying metadata. By supporting standardized XML-based data exchange formats, data import is possible from any type of mass spectrometer. The integrated analysis tools of the OpenMS Proteomics Pipeline (TOPP) allow efficient data analysis from within TOPPView through a convenient graphical user interface. TOPPView runs on all major operating systems and is available free of charge under an open-source license at http://www.openms.de. |
Pfeifer, Nico; Leinenbach, Andreas; Huber, Christian G; Kohlbacher, Oliver Improving Peptide Identification in Proteome Analysis by a Two-Dimensional Retention Time Filtering Approach J. Proteome Res., 8 (8), pp. 4109-15, 2009. @article{2DRTPred, title = {Improving Peptide Identification in Proteome Analysis by a Two-Dimensional Retention Time Filtering Approach}, author = {Nico Pfeifer and Andreas Leinenbach and Christian G Huber and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1021/pr900064b}, year = {2009}, date = {2009-01-01}, journal = {J. Proteome Res.}, volume = {8}, number = {8}, pages = {4109-15}, abstract = {The combination of a two-dimensional peptide separation scheme based on reversed-phase and ion-pair reversed phase HPLC with a computational method to model and predict retention times in both dimensions is described. The algorithm utilizes statistical learning to establish a retention model from about 200 peptide retention times and their corresponding sequences. The application of retention time prediction to the peptides facilitated an increase in true positive peptide identifications upon lowering mass spectrometric scoring thresholds and concomitantly filtering out false positives on the basis of predicted retention times. An approximately 19% increase in the number of peptide identifications at a q-value of 0.01 was achievable in a whole proteome measurement.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The combination of a two-dimensional peptide separation scheme based on reversed-phase and ion-pair reversed phase HPLC with a computational method to model and predict retention times in both dimensions is described. The algorithm utilizes statistical learning to establish a retention model from about 200 peptide retention times and their corresponding sequences. The application of retention time prediction to the peptides facilitated an increase in true positive peptide identifications upon lowering mass spectrometric scoring thresholds and concomitantly filtering out false positives on the basis of predicted retention times. An approximately 19% increase in the number of peptide identifications at a q-value of 0.01 was achievable in a whole proteome measurement. |
Feldhahn, Magdalena; Dönnes, Pierre; Thiel, Philipp; Kohlbacher, Oliver FRED - A Framework for T-cell Epitope Detection Bioinformatics, 25 (20), pp. 2758-9, 2009. @article{FRED, title = {FRED - A Framework for T-cell Epitope Detection}, author = {Magdalena Feldhahn and Pierre Dönnes and Philipp Thiel and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1093/bioinformatics/btp409}, year = {2009}, date = {2009-01-01}, journal = {Bioinformatics}, volume = {25}, number = {20}, pages = {2758-9}, abstract = {SUMMARY: Over the last decade, immunoinformatics has made significant progress. Computational approaches, in particular the prediction of T-cell epitopes using machine learning methods, are at the core of modern vaccine design. Large-scale analyses and the integration or comparison of different methods become increasingly important. We have developed FRED, an extendable, open source software framework for key tasks in immunoinformatics. In this, its first version, FRED offers easily accessible prediction methods for MHC binding and antigen processing as well as general infrastructure for the handling of antigen sequence data and epitopes. FRED is implemented in Python in a modular way and allows the integration of external methods. AVAILABILITY: FRED is freely available for download at http://www-bs.informatik.uni-tuebingen.de/Software/FRED. CONTACT: .}, keywords = {}, pubstate = {published}, tppubtype = {article} } SUMMARY: Over the last decade, immunoinformatics has made significant progress. Computational approaches, in particular the prediction of T-cell epitopes using machine learning methods, are at the core of modern vaccine design. Large-scale analyses and the integration or comparison of different methods become increasingly important. We have developed FRED, an extendable, open source software framework for key tasks in immunoinformatics. In this, its first version, FRED offers easily accessible prediction methods for MHC binding and antigen processing as well as general infrastructure for the handling of antigen sequence data and epitopes. FRED is implemented in Python in a modular way and allows the integration of external methods. AVAILABILITY: FRED is freely available for download at http://www-bs.informatik.uni-tuebingen.de/Software/FRED. CONTACT: . |
Bertsch, Andreas; Leinenbach, Andreas; Pervukhin, Anton; Lubeck, Markus; Hartmer, Ralf; Baessmann, Carsten; Elnakady, Yasser Abbas; Müller, Rolf; Böcker, Sebastian; Huber, Christian G; Kohlbacher, Oliver De novo peptide sequencing by tandem MS using complementary CID and electron transfer dissociation Electrophoresis, 30 (21), pp. 3736-3747, 2009. @article{CompNovo, title = {De novo peptide sequencing by tandem MS using complementary CID and electron transfer dissociation}, author = {Andreas Bertsch and Andreas Leinenbach and Anton Pervukhin and Markus Lubeck and Ralf Hartmer and Carsten Baessmann and Yasser Abbas Elnakady and Rolf Müller and Sebastian Böcker and Christian G Huber and Oliver Kohlbacher}, url = {http://www3.interscience.wiley.com/journal/122665016/abstract}, year = {2009}, date = {2009-01-01}, journal = {Electrophoresis}, volume = {30}, number = {21}, pages = {3736-3747}, abstract = {De novo sequencing of peptides using tandem MS is difficult due to missing fragment ions in the spectra commonly obtained after CID of peptide precursor ions. Complementing CID spectra with spectra obtained in an ion-trap mass spectrometer upon electron transfer dissociation (ETD) significantly increases the sequence coverage with diagnostic ions. In the de novo sequencing algorithm CompNovo presented here, a divide-and-conquer approach was combined with an efficient mass decomposition algorithm to exploit the complementary information contained in CID and ETD spectra. After optimizing the parameters for the algorithm on a well-defined training data set obtained for peptides from nine known proteins, the CompNovo algorithm was applied to the de novo sequencing of peptides derived from a whole protein extract of Sorangium cellulosum bacteria. To 2406 pairs of CID and ETD spectra contained in this data set, 675 fully correct sequences were assigned, which represent a success rate of 28.1%. It is shown that the CompNovo algorithm yields significantly improved sequencing accuracy as compared with published approaches using only CID spectra or combined CID and ETD spectra.}, keywords = {}, pubstate = {published}, tppubtype = {article} } De novo sequencing of peptides using tandem MS is difficult due to missing fragment ions in the spectra commonly obtained after CID of peptide precursor ions. Complementing CID spectra with spectra obtained in an ion-trap mass spectrometer upon electron transfer dissociation (ETD) significantly increases the sequence coverage with diagnostic ions. In the de novo sequencing algorithm CompNovo presented here, a divide-and-conquer approach was combined with an efficient mass decomposition algorithm to exploit the complementary information contained in CID and ETD spectra. After optimizing the parameters for the algorithm on a well-defined training data set obtained for peptides from nine known proteins, the CompNovo algorithm was applied to the de novo sequencing of peptides derived from a whole protein extract of Sorangium cellulosum bacteria. To 2406 pairs of CID and ETD spectra contained in this data set, 675 fully correct sequences were assigned, which represent a success rate of 28.1%. It is shown that the CompNovo algorithm yields significantly improved sequencing accuracy as compared with published approaches using only CID spectra or combined CID and ETD spectra. |
Keller, Andreas; Backes, Christina; Gerasch, Andreas; Kaufmann, Michael; Kohlbacher, Oliver; Lenhof, Hans-Peter A novel algorithm for detecting differentially regulated paths based on Gene Set Enrichment Analysis Bioinformatics, 25 (21), pp. 2787-94, 2009. @article{FiDePa, title = {A novel algorithm for detecting differentially regulated paths based on Gene Set Enrichment Analysis}, author = {Andreas Keller and Christina Backes and Andreas Gerasch and Michael Kaufmann and Oliver Kohlbacher and Hans-Peter Lenhof}, url = {http://bioinformatics.oxfordjournals.org/cgi/content/full/25/21/2787}, year = {2009}, date = {2009-01-01}, journal = {Bioinformatics}, volume = {25}, number = {21}, pages = {2787-94}, abstract = {MOTIVATION: Deregulated signaling cascades are known to play a crucial role in many pathogenic processes, among them are tumor initiation and progression. In the recent past, modern experimental techniques that allow for measuring the amount of mRNA transcripts of almost all known human genes in a tissue or even in a single cell have opened new avenues for studying the activity of the signaling cascades and for understanding the information flow in the networks. RESULTS: We present a novel dynamic programming algorithm for detecting deregulated signaling cascades. The so-called FiDePa (Finding Deregulated Paths) algorithm interprets differences in the expression profiles of tumor and normal tissues. It relies on the well-known gene set enrichment analysis (GSEA) and efficiently detects all paths in a given regulatory or signaling network that are significantly enriched with differentially expressed genes or proteins. Since our algorithm allows for comparing a single tumor expression profile with the control group, it facilitates the detection of specific regulatory features of a tumor that may help to optimize tumor therapy. To demonstrate the capabilities of our algorithm, we analyzed a glioma expression dataset with respect to a directed graph that combined the regulatory networks of the KEGG and TRANSPATH database. The resulting glioma consensus network that encompasses all detected deregulated paths contained many genes and pathways that are known to be key players in glioma or cancer-related pathogenic processes. Moreover, we were able to correlate clinically relevant features like necrosis or metastasis with the detected paths. AVAILABILITY: C++ source code is freely available, BiNA can be downloaded from http://www.bnplusplus.org/. CONTACT: }, keywords = {}, pubstate = {published}, tppubtype = {article} } MOTIVATION: Deregulated signaling cascades are known to play a crucial role in many pathogenic processes, among them are tumor initiation and progression. In the recent past, modern experimental techniques that allow for measuring the amount of mRNA transcripts of almost all known human genes in a tissue or even in a single cell have opened new avenues for studying the activity of the signaling cascades and for understanding the information flow in the networks. RESULTS: We present a novel dynamic programming algorithm for detecting deregulated signaling cascades. The so-called FiDePa (Finding Deregulated Paths) algorithm interprets differences in the expression profiles of tumor and normal tissues. It relies on the well-known gene set enrichment analysis (GSEA) and efficiently detects all paths in a given regulatory or signaling network that are significantly enriched with differentially expressed genes or proteins. Since our algorithm allows for comparing a single tumor expression profile with the control group, it facilitates the detection of specific regulatory features of a tumor that may help to optimize tumor therapy. To demonstrate the capabilities of our algorithm, we analyzed a glioma expression dataset with respect to a directed graph that combined the regulatory networks of the KEGG and TRANSPATH database. The resulting glioma consensus network that encompasses all detected deregulated paths contained many genes and pathways that are known to be key players in glioma or cancer-related pathogenic processes. Moreover, we were able to correlate clinically relevant features like necrosis or metastasis with the detected paths. AVAILABILITY: C++ source code is freely available, BiNA can be downloaded from http://www.bnplusplus.org/. CONTACT: |
Blum, Torsten; Briesemeister, Sebastian; Kohlbacher, Oliver MultiLoc2: integrating phylogeny and Gene Ontology terms improves subcellular protein localization prediction BMC Bioinformatics, 10 , pp. 274, 2009. @article{MultiLoc2, title = {MultiLoc2: integrating phylogeny and Gene Ontology terms improves subcellular protein localization prediction}, author = {Torsten Blum and Sebastian Briesemeister and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1186/1471-2105-10-274}, year = {2009}, date = {2009-01-01}, journal = {BMC Bioinformatics}, volume = {10}, pages = {274}, abstract = {Background Knowledge of subcellular localization of proteins is crucial to proteomics, drug target discovery and systems biology since localization and biological function are highly correlated. In recent years, numerous computational prediction methods have been developed. Nevertheless, there is still a need for prediction methods that show more robustness and higher accuracy. Results We extended our previous MultiLoc predictor by incorporating phylogenetic profiles and Gene Ontology terms. Two different datasets were used for training the system, resulting in two versions of this high-accuracy prediction method. One version is specialized for globular proteins and predicts up to five localizations, whereas a second version covers all eleven main eukaryotic subcellular localizations. In a benchmark study with five localizations, MultiLoc2 performs considerably better than other methods for animal and plant proteins and comparably for fungal proteins. Furthermore, MultiLoc2 performs clearly better when using a second dataset that extends the benchmark study to all eleven main eukaryotic subcellular localizations. Conclusion MultiLoc2 is an extensive high-performance subcellular protein localization prediction system. By incorporating phylogenetic profiles and Gene Ontology terms MultiLoc2 yields higher accuracies compared to its previous version. Moreover, it outperforms other prediction systems in two benchmarks studies. MultiLoc2 is available as user-friendly and free web-service, available at: http://www-bs.informatik.uni-tuebingen.de/Services/MultiLoc2}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Knowledge of subcellular localization of proteins is crucial to proteomics, drug target discovery and systems biology since localization and biological function are highly correlated. In recent years, numerous computational prediction methods have been developed. Nevertheless, there is still a need for prediction methods that show more robustness and higher accuracy. Results We extended our previous MultiLoc predictor by incorporating phylogenetic profiles and Gene Ontology terms. Two different datasets were used for training the system, resulting in two versions of this high-accuracy prediction method. One version is specialized for globular proteins and predicts up to five localizations, whereas a second version covers all eleven main eukaryotic subcellular localizations. In a benchmark study with five localizations, MultiLoc2 performs considerably better than other methods for animal and plant proteins and comparably for fungal proteins. Furthermore, MultiLoc2 performs clearly better when using a second dataset that extends the benchmark study to all eleven main eukaryotic subcellular localizations. Conclusion MultiLoc2 is an extensive high-performance subcellular protein localization prediction system. By incorporating phylogenetic profiles and Gene Ontology terms MultiLoc2 yields higher accuracies compared to its previous version. Moreover, it outperforms other prediction systems in two benchmarks studies. MultiLoc2 is available as user-friendly and free web-service, available at: http://www-bs.informatik.uni-tuebingen.de/Services/MultiLoc2 |
Schneeberger, Korbinian; Hagmann, Jörg; Ossowski, Stephan; Warthmann, Norman; Gesing, Sandra; Kohlbacher, Oliver; Weigel, Detlef Simultaneous alignment of short reads against multiple genomes Genome Biology, 10 (9), pp. R98, 2009. @article{GenomeMapper, title = {Simultaneous alignment of short reads against multiple genomes}, author = {Korbinian Schneeberger and Jörg Hagmann and Stephan Ossowski and Norman Warthmann and Sandra Gesing and Oliver Kohlbacher and Detlef Weigel}, url = {http://dx.doi.org/10.1186/gb-2009-10-9-r98}, year = {2009}, date = {2009-01-01}, journal = {Genome Biology}, volume = {10}, number = {9}, pages = {R98}, abstract = {ABSTRACT: Genome resequencing with short reads generally relies on alignments against a single reference. GenomeMapper supports simultaneous mapping of short reads against multiple genomes by integrating related genomes (e.g., individuals of the same species) into a single graph structure. It constitutes the first approach for handling multiple references, and introduces representations for alignments against complex structures. Demonstrated benefits include access to polymorphisms that cannot be identified by alignments against the reference alone. Download GenomeMapper at http://1001genomes.org.}, keywords = {}, pubstate = {published}, tppubtype = {article} } ABSTRACT: Genome resequencing with short reads generally relies on alignments against a single reference. GenomeMapper supports simultaneous mapping of short reads against multiple genomes by integrating related genomes (e.g., individuals of the same species) into a single graph structure. It constitutes the first approach for handling multiple references, and introduces representations for alignments against complex structures. Demonstrated benefits include access to polymorphisms that cannot be identified by alignments against the reference alone. Download GenomeMapper at http://1001genomes.org. |
Böcker, Sebastian; Briesemeister, Sebastian; Klau, Gunnar W On Optimal Comparability Editing with Applications to Molecular Diagnostics BMC Bioinformatics, 10 (Suppl 1), pp. S61, 2009. @article{boecker08comparability, title = {On Optimal Comparability Editing with Applications to Molecular Diagnostics}, author = {Sebastian Böcker and Sebastian Briesemeister and Gunnar W Klau}, url = {http://dx.doi.org/10.1186/1471-2105-10-S1-S61}, year = {2009}, date = {2009-01-01}, journal = {BMC Bioinformatics}, volume = {10}, number = {Suppl 1}, pages = {S61}, abstract = {Background The COMPARABILITY EDITING problem appears in the context of hierarchical disease classification based on noisy data. We are given a directed graph G representing hierarchical relationships between patient subgroups. The task is to identify the minimum number of edge insertions or deletions to transform G into a transitive graph, that is, if edges (u, v) and (v, w) are present then edge (u, w) must be present, too. Results We present two new approaches for the problem based on fixed-parameter algorithmics and integer linear programming. In contrast to previously used heuristics, our approaches compute provably optimal solutions. Conclusion Our computational results demonstrate that our exact algorithms are by far more efficient in practice than a previously used heuristic approach. In addition to the superior running time performance, our algorithms are capable of enumerating all optimal solutions, and naturally solve the weighted version of the problem.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background The COMPARABILITY EDITING problem appears in the context of hierarchical disease classification based on noisy data. We are given a directed graph G representing hierarchical relationships between patient subgroups. The task is to identify the minimum number of edge insertions or deletions to transform G into a transitive graph, that is, if edges (u, v) and (v, w) are present then edge (u, w) must be present, too. Results We present two new approaches for the problem based on fixed-parameter algorithmics and integer linear programming. In contrast to previously used heuristics, our approaches compute provably optimal solutions. Conclusion Our computational results demonstrate that our exact algorithms are by far more efficient in practice than a previously used heuristic approach. In addition to the superior running time performance, our algorithms are capable of enumerating all optimal solutions, and naturally solve the weighted version of the problem. |
Böcker, Sebastian; Briesemeister, Sebastian; Bui, Quang BaoAnh; Truss, Anke Going Weighted: Parameterized Algorithms for Cluster Editing Theoretical Computer Science, 410 (52), pp. 5467-5480, 2009, (This is an extended version of two articles published in the Proc. of the 6th Asia Pacific Bioinformatics Conference, APBC 2008, in: Series on Advances in Bioinformatics and Computational Biology, vol. 5, Imperial College Press, pp. 211–220; and Proc. of the 2nd Conference on Combinatorial Optimization and Applications, COCOA 2008, in: LNCS, vol. 5038, Springer, pp. 289–302.). @article{boecker09going, title = {Going Weighted: Parameterized Algorithms for Cluster Editing}, author = {Sebastian Böcker and Sebastian Briesemeister and Quang BaoAnh Bui and Anke Truss}, url = {http://dx.doi.org/10.1016/j.tcs.2009.05.006}, year = {2009}, date = {2009-01-01}, journal = {Theoretical Computer Science}, volume = {410}, number = {52}, pages = {5467-5480}, abstract = {The goal of the Cluster Editing problem is to make the fewest changes to the edge set of an input graph such that the resulting graph is a disjoint union of cliques. This problem is NP-complete but recently, several parameterized algorithms have been proposed. In this paper, we present a number of surprisingly simple search tree algorithms for Weighted Cluster Editing assuming that edge insertion and deletion costs are positive integers. We show that the smallest search tree has size O(1.82^k) for edit cost k, resulting in the currently fastest parameterized algorithm, both for this problem and its unweighted counterpart. We have implemented and compared our algorithms, and achieved promising results.}, note = {This is an extended version of two articles published in the Proc. of the 6th Asia Pacific Bioinformatics Conference, APBC 2008, in: Series on Advances in Bioinformatics and Computational Biology, vol. 5, Imperial College Press, pp. 211–220; and Proc. of the 2nd Conference on Combinatorial Optimization and Applications, COCOA 2008, in: LNCS, vol. 5038, Springer, pp. 289–302.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The goal of the Cluster Editing problem is to make the fewest changes to the edge set of an input graph such that the resulting graph is a disjoint union of cliques. This problem is NP-complete but recently, several parameterized algorithms have been proposed. In this paper, we present a number of surprisingly simple search tree algorithms for Weighted Cluster Editing assuming that edge insertion and deletion costs are positive integers. We show that the smallest search tree has size O(1.82^k) for edit cost k, resulting in the currently fastest parameterized algorithm, both for this problem and its unweighted counterpart. We have implemented and compared our algorithms, and achieved promising results. |
Toussaint, Nora C; Kohlbacher, Oliver Towards in silico design of epitope-based vaccines Expert Opinion on Drug Discovery, 4 (10), pp. 1047-1060, 2009. @article{ReviewEVDes, title = {Towards in silico design of epitope-based vaccines}, author = {Nora C Toussaint and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1517/17460440903242283}, year = {2009}, date = {2009-01-01}, journal = {Expert Opinion on Drug Discovery}, volume = {4}, number = {10}, pages = {1047-1060}, abstract = {Background: Epitope-based vaccines (EVs) make use of immunogenic peptides (epitopes) to trigger an immune response. Due to their manifold advantages, EVs have recently been attracting growing interest. The success of an EV is determined by the choice of epitopes used as a basis. However, the experimental discovery of candidate epitopes is expensive in terms of time and money. Furthermore, for the final choice of epitopes various immunological requirements have to be considered. Methods: Numerous in silico approaches exist that can guide the design of EVs. In particular, computational methods for MHC binding prediction have already become standard tools in immunology. Apart from binding prediction and prediction of antigen processing, methods for epitope design and selection have been suggested. We review these in silico approaches for epitope discovery and selection along with their strengths and weaknesses. Finally, we discuss some of the obvious problems in the design of EVs. Conclusion: State-of-the-art in silico approaches to MHC binding prediction yield high accuracies. However, a more thorough understanding of the underlying biological processes and significant amounts of experimental data will be required for the validation and improvement of in silico approaches to the remaining aspects of EV design.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background: Epitope-based vaccines (EVs) make use of immunogenic peptides (epitopes) to trigger an immune response. Due to their manifold advantages, EVs have recently been attracting growing interest. The success of an EV is determined by the choice of epitopes used as a basis. However, the experimental discovery of candidate epitopes is expensive in terms of time and money. Furthermore, for the final choice of epitopes various immunological requirements have to be considered. Methods: Numerous in silico approaches exist that can guide the design of EVs. In particular, computational methods for MHC binding prediction have already become standard tools in immunology. Apart from binding prediction and prediction of antigen processing, methods for epitope design and selection have been suggested. We review these in silico approaches for epitope discovery and selection along with their strengths and weaknesses. Finally, we discuss some of the obvious problems in the design of EVs. Conclusion: State-of-the-art in silico approaches to MHC binding prediction yield high accuracies. However, a more thorough understanding of the underlying biological processes and significant amounts of experimental data will be required for the validation and improvement of in silico approaches to the remaining aspects of EV design. |
Althaus, Ernst; Klau, Gunnar W; Kohlbacher, Oliver; Lenhof, Hans-Peter; Reinert, Knut Integer Linear Programming in Computational Biology Efficient Algorithms, pp. 199-218, Springer, 2009. @inproceedings{DBLP-conf-birthday-AlthausKKLR09, title = {Integer Linear Programming in Computational Biology}, author = {Ernst Althaus and Gunnar W Klau and Oliver Kohlbacher and Hans-Peter Lenhof and Knut Reinert}, url = {http://dx.doi.org/10.1007/978-3-642-03456-5_14}, year = {2009}, date = {2009-01-01}, booktitle = {Efficient Algorithms}, volume = {5760}, pages = {199-218}, publisher = {Springer}, series = {Lecture Notes in Computer Science}, abstract = {Computational molecular biology (bioinformatics) is a young research field that is rich in NP-hard optimization problems. The problem instances encountered are often huge and comprise thousands of variables. Since their introduction into the field of bioinformatics in 1997, integer linear programming (ILP) techniques have been successfully applied to many optimization problems. These approaches have added much momentum to development and progress in related areas. In particular, ILP-based approaches have become a standard optimization technique in bioinformatics. In this review, we present applications of ILP-based techniques developed by members and former members of Kurt Mehlhorn’s group. These techniques were introduced to bioinformatics in a series of papers and popularized by demonstration of their effectiveness and potential.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Computational molecular biology (bioinformatics) is a young research field that is rich in NP-hard optimization problems. The problem instances encountered are often huge and comprise thousands of variables. Since their introduction into the field of bioinformatics in 1997, integer linear programming (ILP) techniques have been successfully applied to many optimization problems. These approaches have added much momentum to development and progress in related areas. In particular, ILP-based approaches have become a standard optimization technique in bioinformatics. In this review, we present applications of ILP-based techniques developed by members and former members of Kurt Mehlhorn’s group. These techniques were introduced to bioinformatics in a series of papers and popularized by demonstration of their effectiveness and potential. |
Briesemeister, Sebastian; Blum, Torsten; Brady, Scott; Lam, Yin; Kohlbacher, Oliver; Shatkay, Hagit SherLoc2: a high-accuracy hybrid method for predicting subcellular localization of proteins J. Proteome Res., 8 (11), pp. 5363–5366, 2009. @article{SherLoc2, title = {SherLoc2: a high-accuracy hybrid method for predicting subcellular localization of proteins}, author = {Sebastian Briesemeister and Torsten Blum and Scott Brady and Yin Lam and Oliver Kohlbacher and Hagit Shatkay}, url = {http://dx.doi.org/10.1021/pr900665y}, year = {2009}, date = {2009-01-01}, journal = {J. Proteome Res.}, volume = {8}, number = {11}, pages = {5363–5366}, abstract = {SherLoc2 is a comprehensive high-accuracy subcellular localization prediction system. It is applicable to animal, fungal, and plant proteins and covers all main eukaryotic subcellular locations. SherLoc2 integrates several sequence-based features as well as text-based features. In addition, we incorporate phylogenetic profiles and Gene Ontology (GO) terms derived from the protein sequence to considerably improve the prediction performance. SherLoc2 achieves an overall classification accuracy of up to 93% in five-fold cross-validation. A novel feature, DiaLoc, allows users to manually provide their current background knowledge by describing a protein in a short abstract which is then used to improve the prediction. SherLoc2 is available both as a free web service and as a stand-alone version at http://www-bs.informatik.uni-tuebingen.de/Services/SherLoc2 .}, keywords = {}, pubstate = {published}, tppubtype = {article} } SherLoc2 is a comprehensive high-accuracy subcellular localization prediction system. It is applicable to animal, fungal, and plant proteins and covers all main eukaryotic subcellular locations. SherLoc2 integrates several sequence-based features as well as text-based features. In addition, we incorporate phylogenetic profiles and Gene Ontology (GO) terms derived from the protein sequence to considerably improve the prediction performance. SherLoc2 achieves an overall classification accuracy of up to 93% in five-fold cross-validation. A novel feature, DiaLoc, allows users to manually provide their current background knowledge by describing a protein in a short abstract which is then used to improve the prediction. SherLoc2 is available both as a free web service and as a stand-alone version at http://www-bs.informatik.uni-tuebingen.de/Services/SherLoc2 . |
Ahrends, Robert; Lichtner, Björn; Bertsch, Andreas; Kohlbacher, Oliver; Trusch, Maria; Schlüter, Hartmut Application of displacement chromatography for the proteome analysis of a human plasma protein fraction J. Chromatogr. A, 1217 (19), pp. 3321-9, 2009. @article{Ahrends_DisplacementChrJChrA, title = {Application of displacement chromatography for the proteome analysis of a human plasma protein fraction}, author = {Robert Ahrends and Björn Lichtner and Andreas Bertsch and Oliver Kohlbacher and Maria Trusch and Hartmut Schlüter}, doi = {https://doi.org/10.1016/j.chroma.2009.10.028}, year = {2009}, date = {2009-01-01}, journal = {J. Chromatogr. A}, volume = {1217}, number = {19}, pages = {3321-9}, abstract = {It was the aim of this study to compare the performance of displacement chromatography with gradient elution chromatography both applied as the cation-exchange separation step for a proteome analysis in a bottom-up approach using multidimensional chromatography for the separation of tryptic peptides prior to their mass spectrometric analysis. The tryptic digest of the human Cohn fraction IV-4 served as a sample. For both chromatography modes commonly used operating parameters were chosen thus ensuring optimal separation results of equal sample amounts for each mode. All resulting fractions were analyzed with an HPLC-chip-LC-MS system. The eluate of the HPLC-chip column was ionized by electrospray ionization (ESI) and analyzed with an ion-trap mass spectrometer. For guaranteeing high confidence concerning the identity of the peptides, the mass spectrometric data were processed by different bioinformatic tools applying stringent criteria. By the displacement approach the total amount of identified proteins (78) was significantly higher than in the gradient mode (58). The results showed that displacement chromatography is a well suited alternative in comparison to gradient elution separation for analysis of proteomes via the bottom-up approach applying multidimensional chromatography, especially in those cases when larger quantities of proteins are available.}, keywords = {}, pubstate = {published}, tppubtype = {article} } It was the aim of this study to compare the performance of displacement chromatography with gradient elution chromatography both applied as the cation-exchange separation step for a proteome analysis in a bottom-up approach using multidimensional chromatography for the separation of tryptic peptides prior to their mass spectrometric analysis. The tryptic digest of the human Cohn fraction IV-4 served as a sample. For both chromatography modes commonly used operating parameters were chosen thus ensuring optimal separation results of equal sample amounts for each mode. All resulting fractions were analyzed with an HPLC-chip-LC-MS system. The eluate of the HPLC-chip column was ionized by electrospray ionization (ESI) and analyzed with an ion-trap mass spectrometer. For guaranteeing high confidence concerning the identity of the peptides, the mass spectrometric data were processed by different bioinformatic tools applying stringent criteria. By the displacement approach the total amount of identified proteins (78) was significantly higher than in the gradient mode (58). The results showed that displacement chromatography is a well suited alternative in comparison to gradient elution separation for analysis of proteomes via the bottom-up approach applying multidimensional chromatography, especially in those cases when larger quantities of proteins are available. |
Toussaint, Nora; Kohlbacher, Oliver; Rätsch, Gunnar Exploiting Physico-Chemical Properties in String-Kernels MLCB 2009 (accepted), 2009. (BibTeX) @inproceedings{inproceedingsreference.2009-11-01.2370655019, title = {Exploiting Physico-Chemical Properties in String-Kernels}, author = {Nora Toussaint and Oliver Kohlbacher and Gunnar Rätsch}, year = {2009}, date = {2009-01-01}, booktitle = {MLCB 2009 (accepted)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Kohlbacher, Oliver; Reinert, Knut OpenMS and TOPP: Open Source Software for LC-MS Data Analysis Hubbard, Simon J; Jones, Andrew R (Ed.): Proteome Bioinformatics, 604 , pp. 201-11, Humana Press, 2009. @incollection{MiMB-OpenMS_TOPP, title = {OpenMS and TOPP: Open Source Software for LC-MS Data Analysis}, author = {Oliver Kohlbacher and Knut Reinert}, editor = {Simon J Hubbard and Andrew R Jones}, doi = {https://doi.org/10.1007/978-1-60761-987-1_23}, year = {2009}, date = {2009-01-01}, booktitle = {Proteome Bioinformatics}, volume = {604}, pages = {201-11}, publisher = {Humana Press}, chapter = {14}, series = {Methods in Molecular Biology}, abstract = {The automatic analysis of mass spectrometry data is becoming more and more important since increasingly larger datasets are readily available that cannot be evaluated manually. This has triggered the development of several open-source software libraries for the automatic analysis of such data. Among those is OpenMS together with TOPP (The OpenMS Proteomics Pipeline). OpenMS is a C++ library for rapid prototyping of complex algorithms for the analysis of mass spectrometry data. Based on the OpenMS library, TOPP provides a collection of tools for the most important tasks in proteomics analysis. The tight coupling of OpenMS and TOPP makes it easy to extend TOPP by adding new tools to the OpenMS library. We describe the overall concepts behind the software and illustrate its use with several examples.}, keywords = {}, pubstate = {published}, tppubtype = {incollection} } The automatic analysis of mass spectrometry data is becoming more and more important since increasingly larger datasets are readily available that cannot be evaluated manually. This has triggered the development of several open-source software libraries for the automatic analysis of such data. Among those is OpenMS together with TOPP (The OpenMS Proteomics Pipeline). OpenMS is a C++ library for rapid prototyping of complex algorithms for the analysis of mass spectrometry data. Based on the OpenMS library, TOPP provides a collection of tools for the most important tasks in proteomics analysis. The tight coupling of OpenMS and TOPP makes it easy to extend TOPP by adding new tools to the OpenMS library. We describe the overall concepts behind the software and illustrate its use with several examples. |
Peifer, Christian; Urich, Robert; Schattel, Verena; Abdaleh, Mohammed; Röttig, Marc; Kohlbacher, Oliver; Laufer, Stefan Implications for selectivity of 3,4-diarylquinolinones as p38aMAP kinase inhibitors Bioorg. Med. Chem. Lett., 18 (4), pp. 1431-1435, 2008. @article{MAPKInhib, title = {Implications for selectivity of 3,4-diarylquinolinones as p38aMAP kinase inhibitors}, author = {Christian Peifer and Robert Urich and Verena Schattel and Mohammed Abdaleh and Marc Röttig and Oliver Kohlbacher and Stefan Laufer}, url = {http://dx.doi.org/10.1016/j.bmcl.2007.12.073}, year = {2008}, date = {2008-01-01}, journal = {Bioorg. Med. Chem. Lett.}, volume = {18}, number = {4}, pages = {1431-1435}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Sturm, Marc; Bertsch, Andreas; Gröpl, Clemens; Hildebrandt, Andreas; Hussong, Rene; Lange, Eva; Pfeifer, Nico; Schulz-Trieglaff, Ole; Zerck, Alexandra; Reinert, Knut; Kohlbacher, Oliver OpenMS - An open-source software framework for mass spectrometry BMC Bioinformatics, 9 , pp. 163, 2008. @article{OpenMS, title = {OpenMS - An open-source software framework for mass spectrometry}, author = {Marc Sturm and Andreas Bertsch and Clemens Gröpl and Andreas Hildebrandt and Rene Hussong and Eva Lange and Nico Pfeifer and Ole Schulz-Trieglaff and Alexandra Zerck and Knut Reinert and Oliver Kohlbacher}, url = {http://www.biomedcentral.com/1471-2105/9/163}, year = {2008}, date = {2008-01-01}, journal = {BMC Bioinformatics}, volume = {9}, pages = {163}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Feldhahn, Magdalena; Thiel, Philipp; Schuler, Mathias; Hillen, Nina; Stevanovic, Stefan; Rammensee, Hans-Georg; Kohlbacher, Oliver EpiToolKit - A web server for computational immunomics Nucleic Acids Res., 36 , pp. W519-22, 2008. @article{ETK, title = {EpiToolKit - A web server for computational immunomics}, author = {Magdalena Feldhahn and Philipp Thiel and Mathias Schuler and Nina Hillen and Stefan Stevanovic and Hans-Georg Rammensee and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1093/nar/gkn229}, year = {2008}, date = {2008-01-01}, journal = {Nucleic Acids Res.}, volume = {36}, pages = {W519-22}, abstract = {Predicting the T-cell-mediated immune response is an important task in vaccine design and thus one of the key problems in computational immunomics. Various methods have been developed during the last decade and are available online. We present EpiToolKit, a web server that has been specifically designed to offer a problem-solving environment for computational immunomics. EpiToolKit offers a variety of different prediction methods for major histocompatibility complex class I and II ligands as well as minor histocompatibility antigens. These predictions are embedded in a user-friendly interface allowing refining, editing and constraining the searches conveniently. We illustrate the value of the approach with a set of novel tumor-associated peptides. EpiToolKit is available online at www.epitoolkit.org.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Predicting the T-cell-mediated immune response is an important task in vaccine design and thus one of the key problems in computational immunomics. Various methods have been developed during the last decade and are available online. We present EpiToolKit, a web server that has been specifically designed to offer a problem-solving environment for computational immunomics. EpiToolKit offers a variety of different prediction methods for major histocompatibility complex class I and II ligands as well as minor histocompatibility antigens. These predictions are embedded in a user-friendly interface allowing refining, editing and constraining the searches conveniently. We illustrate the value of the approach with a set of novel tumor-associated peptides. EpiToolKit is available online at www.epitoolkit.org. |
Blum, Torsten; Kohlbacher, Oliver Using atom mapping rules for an improved detection of relevant routes in weighted metabolic networks J. Comput. Biol., 15 (6), pp. 565-576, 2008. @article{AtomMapping, title = {Using atom mapping rules for an improved detection of relevant routes in weighted metabolic networks}, author = {Torsten Blum and Oliver Kohlbacher}, url = {http://dx.doi.org/10.1089/cmb.2008.0044}, year = {2008}, date = {2008-01-01}, journal = {J. Comput. Biol.}, volume = {15}, number = {6}, pages = {565-576}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Kerzmann, Andreas; Fuhrmann, Jan; Kohlbacher, Oliver; Neumann, Dirk BALLDock/SLICK: A new Method for Protein-Carbohydrate Docking J. Chem. Inf. Model., 48 (8), pp. 1616-1625, 2008. @article{ArticleReference.2008-06-12.8693763500, title = {BALLDock/SLICK: A new Method for Protein-Carbohydrate Docking}, author = {Andreas Kerzmann and Jan Fuhrmann and Oliver Kohlbacher and Dirk Neumann}, url = {http://dx.doi.org/10.1021/ci800103u}, year = {2008}, date = {2008-01-01}, journal = {J. Chem. Inf. Model.}, volume = {48}, number = {8}, pages = {1616-1625}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Schultheiss, Sebastian J; Busch, Wolfgang; Lohmann, Jan U; Kohlbacher, Oliver; Rätsch, Gunnar KIRMES: Kernel-based Identification of Regulatory Modules in Euchromatic Sequences Beyer, A; Schroeder, M (Ed.): Proc. German Conference Bioinformatikcs (GCB 2008), pp. 158-167, GI, 2008. @inproceedings{InproceedingsReference.2008-06-24.2988332664, title = {KIRMES: Kernel-based Identification of Regulatory Modules in Euchromatic Sequences}, author = {Sebastian J Schultheiss and Wolfgang Busch and Jan U Lohmann and Oliver Kohlbacher and Gunnar Rätsch}, editor = {A Beyer and M Schroeder}, url = {http://www.fml.tuebingen.mpg.de/raetsch/projects/kirmes}, year = {2008}, date = {2008-01-01}, booktitle = {Proc. German Conference Bioinformatikcs (GCB 2008)}, pages = {158-167}, publisher = {GI}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Borgatti, Stephen; Kobourov, Stephen; Kohlbacher, Oliver; Mutzel, Petra 08191 Abstracts Collection -- Graph Drawing with Applications to Bioinformatics and Social Sciences Borgatti, Stephen P; Kobourov, Stephen; Kohlbacher, Oliver; Mutzel, Petra (Ed.): Graph Drawing with Applications to Bioinformatics and Social Sciences, Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik, Germany, Dagstuhl, Germany, 2008. @inproceedings{borgatti_et_alDSP20081554, title = {08191 Abstracts Collection -- Graph Drawing with Applications to Bioinformatics and Social Sciences}, author = {Stephen Borgatti and Stephen Kobourov and Oliver Kohlbacher and Petra Mutzel}, editor = {Stephen P Borgatti and Stephen Kobourov and Oliver Kohlbacher and Petra Mutzel}, url = {http://drops.dagstuhl.de/opus/volltexte/2008/1554}, year = {2008}, date = {2008-01-01}, booktitle = {Graph Drawing with Applications to Bioinformatics and Social Sciences}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik, Germany}, address = {Dagstuhl, Germany}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Borgatti, Stephen; Kobourov, Stephen; Kohlbacher, Oliver; Mutzel, Petra 08191 Executive Summary -- Graph Drawing with Applications to Bioinformatics and Social Sciences Borgatti, Stephen P; Kobourov, Stephen; Kohlbacher, Oliver; Mutzel, Petra (Ed.): Graph Drawing with Applications to Bioinformatics and Social Sciences, Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik, Germany, Dagstuhl, Germany, 2008. @inproceedings{borgatti_et_alDSP20081552, title = {08191 Executive Summary -- Graph Drawing with Applications to Bioinformatics and Social Sciences}, author = {Stephen Borgatti and Stephen Kobourov and Oliver Kohlbacher and Petra Mutzel}, editor = {Stephen P Borgatti and Stephen Kobourov and Oliver Kohlbacher and Petra Mutzel}, url = {http://drops.dagstuhl.de/opus/volltexte/2008/1552}, year = {2008}, date = {2008-01-01}, booktitle = {Graph Drawing with Applications to Bioinformatics and Social Sciences}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik, Germany}, address = {Dagstuhl, Germany}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Albrecht, Mario; Estrella-Balderrama, Alejandro; Geyer, Markus; Gutwenger, Carsten; Klein, Karsten; Kohlbacher, Oliver; Schulz, Michael 08191 Working Group Summary -- Visually Comparing a Set of Graphs Borgatti, Stephen P; Kobourov, Stephen; Kohlbacher, Oliver; Mutzel, Petra (Ed.): Graph Drawing with Applications to Bioinformatics and Social Sciences, Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik, Germany, Dagstuhl, Germany, 2008. @inproceedings{albrecht_et_alDSP20081553, title = {08191 Working Group Summary -- Visually Comparing a Set of Graphs}, author = {Mario Albrecht and Alejandro Estrella-Balderrama and Markus Geyer and Carsten Gutwenger and Karsten Klein and Oliver Kohlbacher and Michael Schulz}, editor = {Stephen P Borgatti and Stephen Kobourov and Oliver Kohlbacher and Petra Mutzel}, url = {http://drops.dagstuhl.de/opus/volltexte/2008/1553}, year = {2008}, date = {2008-01-01}, booktitle = {Graph Drawing with Applications to Bioinformatics and Social Sciences}, publisher = {Schloss Dagstuhl - Leibniz-Zentrum fuer Informatik, Germany}, address = {Dagstuhl, Germany}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Pfeifer, Nico; Kohlbacher, Oliver Multiple Instance Learning Allows MHC Class II Epitope Predictions across Alleles Crandall, K A; Lagergren, J (Ed.): Proceedings of the 8th Workshop on Algorithms in Bioinformatics (WABI 2008), Lecture Note in Bioinformatics vol. 5251, pp. 210-221, Springer, 2008. @inproceedings{MHCIIMulti, title = {Multiple Instance Learning Allows MHC Class II Epitope Predictions across Alleles}, author = {Nico Pfeifer and Oliver Kohlbacher}, editor = {K A Crandall and J Lagergren}, url = {http://dx.doi.org/10.1007/978-3-540-87361-7_18}, year = {2008}, date = {2008-01-01}, booktitle = {Proceedings of the 8th Workshop on Algorithms in Bioinformatics (WABI 2008), Lecture Note in Bioinformatics vol. 5251}, pages = {210-221}, publisher = {Springer}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Zhang, Fajun; Skoda, Maximilian W A; Jacobs, Robert M J; Zorn, S; Martin, R A; Martin, C M; Clark, G F; Weggler, Sophie; Hildebrandt, Andreas; Kohlbacher, Oliver; Schreiber, Frank Reentrant Condensation of Proteins ins Solution Induced by Multivalent Counterions Phys. Rev. Lett., 101 (14), pp. 148101, 2008. @article{ArticleReference.2008-08-25.4730867284, title = {Reentrant Condensation of Proteins ins Solution Induced by Multivalent Counterions}, author = {Fajun Zhang and Maximilian W A Skoda and Robert M J Jacobs and S Zorn and R A Martin and C M Martin and G F Clark and Sophie Weggler and Andreas Hildebrandt and Oliver Kohlbacher and Frank Schreiber}, url = {http://link.aps.org/abstract/PRL/v101/e148101}, year = {2008}, date = {2008-01-01}, journal = {Phys. Rev. Lett.}, volume = {101}, number = {14}, pages = {148101}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Schulz-Trieglaff, Ole; Pfeifer, Nico; Gröpl, Clemens; Kohlbacher, Oliver; Reinert, Knut LC-MSsim - a simulation software for Liquid ChromatographyMass Spectrometry data BMC Bioinformatics, 9 , pp. 423, 2008. @article{LCMSSim, title = {LC-MSsim - a simulation software for Liquid ChromatographyMass Spectrometry data}, author = {Ole Schulz-Trieglaff and Nico Pfeifer and Clemens Gröpl and Oliver Kohlbacher and Knut Reinert}, url = {http://www.biomedcentral.com/1471-2105/9/423}, year = {2008}, date = {2008-01-01}, journal = {BMC Bioinformatics}, volume = {9}, pages = {423}, abstract = {Background Mass Spectrometry coupled to Liquid Chromatography (LC-MS) is commonly used to analyze the protein content of biological samples in large scale studies. The data resulting from an LC-MS experiment is huge, highly complex and noisy. Accordingly, it has sparked new developments in Bioinformatics, especially in the fields of algorithm development, statistics and software engineering. In a quantitative label-free mass spectrometry experiment, crucial steps are the detection of peptide features in the mass spectra and the alignment of samples by correcting for shifts in retention time. At the moment, it is difficult to compare the plethora of algorithms for these tasks. So far, curated benchmark data exists only for peptide identification algorithms but no data that represents a ground truth for the evaluation of feature detection, alignment and filtering algorithms. Results We present LC-MSsim, a simulation software for LC-ESI-MS experiments. It simulates ESI spectra on the MS level. It reads a list of proteins from a FASTA file and digests the protein mixture using a user-defined enzyme. The software creates an LC-MS data set using a predictor for the retention time of the peptides and a model for peak shapes and elution profiles of the mass spectral peaks. Our software also offers the possibility to add contaminants, to change the background noise level and includes a model for the detectability of peptides in mass spectra. After the simulation, LC-MSsim writes the simulated data to mzData, a public XML format. The software also stores the positions (monoisotopic m/z and retention time) and ion counts of the simulated ions in separate files. Conclusion LC-MSsim generates simulated LC-MS data sets and incorporates models for peak shapes and contaminations. Algorithm developers can match the results of feature detection and alignment algorithms against the simulated ion lists and meaningful error rates can be computed. We anticipate that LC-MSsim will be useful to the wider community to perform benchmark studies and comparisons between computational tools.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Mass Spectrometry coupled to Liquid Chromatography (LC-MS) is commonly used to analyze the protein content of biological samples in large scale studies. The data resulting from an LC-MS experiment is huge, highly complex and noisy. Accordingly, it has sparked new developments in Bioinformatics, especially in the fields of algorithm development, statistics and software engineering. In a quantitative label-free mass spectrometry experiment, crucial steps are the detection of peptide features in the mass spectra and the alignment of samples by correcting for shifts in retention time. At the moment, it is difficult to compare the plethora of algorithms for these tasks. So far, curated benchmark data exists only for peptide identification algorithms but no data that represents a ground truth for the evaluation of feature detection, alignment and filtering algorithms. Results We present LC-MSsim, a simulation software for LC-ESI-MS experiments. It simulates ESI spectra on the MS level. It reads a list of proteins from a FASTA file and digests the protein mixture using a user-defined enzyme. The software creates an LC-MS data set using a predictor for the retention time of the peptides and a model for peak shapes and elution profiles of the mass spectral peaks. Our software also offers the possibility to add contaminants, to change the background noise level and includes a model for the detectability of peptides in mass spectra. After the simulation, LC-MSsim writes the simulated data to mzData, a public XML format. The software also stores the positions (monoisotopic m/z and retention time) and ion counts of the simulated ions in separate files. Conclusion LC-MSsim generates simulated LC-MS data sets and incorporates models for peak shapes and contaminations. Algorithm developers can match the results of feature detection and alignment algorithms against the simulated ion lists and meaningful error rates can be computed. We anticipate that LC-MSsim will be useful to the wider community to perform benchmark studies and comparisons between computational tools. |
Toussaint, Nora C; Dönnes, Pierre; Kohlbacher, Oliver A Mathematical Framework for the Selection of an Optimal Set of Peptides for Epitope-based Vaccines PLoS Comput. Biol., 4 (12), pp. e1000246, 2008. @article{OptVacDes, title = {A Mathematical Framework for the Selection of an Optimal Set of Peptides for Epitope-based Vaccines}, author = {Nora C Toussaint and Pierre Dönnes and Oliver Kohlbacher}, url = {http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1000246}, year = {2008}, date = {2008-01-01}, journal = {PLoS Comput. Biol.}, volume = {4}, number = {12}, pages = {e1000246}, keywords = {}, pubstate = {published}, tppubtype = {article} } |