Publications
Novák, Jirí; Sachsenberg, Timo; Hoksza, David; Skopal, Tomás; Kohlbacher, Oliver A Statistical Comparison of SimTandem with State-of-the-Art Peptide Identification Tools 7th International Conference on Practical Applications of Computational Biology & Bioinformatics, pp. 101–109, Springer 2013. @inproceedings{novak2013statistical, title = {A Statistical Comparison of SimTandem with State-of-the-Art Peptide Identification Tools}, author = {Jirí Novák and Timo Sachsenberg and David Hoksza and Tomás Skopal and Oliver Kohlbacher}, url = {https://link.springer.com/chapter/10.1007/978-3-319-00578-2_14}, year = {2013}, date = {2013-01-01}, booktitle = {7th International Conference on Practical Applications of Computational Biology & Bioinformatics}, pages = {101--109}, organization = {Springer}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Gifford, Casey A; Ziller, Michael J; Gu, Hongcang; Trapnell, Cole; Donaghey, Julie; Tsankov, Alexander; Shalek, Alex K; Shishkin, Alexander A; Issner, Robbyn; Zhang, Xiaolan; Fostel, Jennifer L; Holmes, Laurie; Meldrim, Jim; Guttman, Mitchell; Epstein, Charles; Park, Hongkun; Kohlbacher, Oliver; Rinn, John; Gnirke, Andreas; Lander, Eric S; Bernstein, Bradley E; Meissner, Alexander Transcriptional and Epigenetic Dynamics during Specification of Human Embryonic Stem Cells Cell, 153 (5), pp. 1149-1163, 2013. @article{GIffordCell2013, title = {Transcriptional and Epigenetic Dynamics during Specification of Human Embryonic Stem Cells}, author = {Casey A Gifford and Michael J Ziller and Hongcang Gu and Cole Trapnell and Julie Donaghey and Alexander Tsankov and Alex K Shalek and Alexander A Shishkin and Robbyn Issner and Xiaolan Zhang and Jennifer L Fostel and Laurie Holmes and Jim Meldrim and Mitchell Guttman and Charles Epstein and Hongkun Park and Oliver Kohlbacher and John Rinn and Andreas Gnirke and Eric S Lander and Bradley E Bernstein and Alexander Meissner}, doi = {https://doi.org/10.1016/j.cell.2013.04.037}, year = {2013}, date = {2013-01-01}, journal = {Cell}, volume = {153}, number = {5}, pages = {1149-1163}, abstract = {Differentiation of human embryonic stemcells (hESCs) provides a unique opportunity to study the regulatory mechanisms that facilitate cellular transitions in a human context. To that end, we performed comprehensive transcriptional and epigenetic profiling of populationsderivedthroughdirecteddifferentiation ofhESCs representing each of the three embryonic germlayers. Integration of whole-genome bisulfite sequencing, chromatin immunoprecipitation sequencing, and RNA sequencing reveals unique events associated with specification toward each lineage. Dynamic alterations inDNAmethylationandH3K4me1are evident at putative distal regulatory elements bound by pluripotency factors or activated in specific lineages. In addition, we identified germ-layer-specific H3K27me3 enrichment at sites exhibiting high DNA methylation in the undifferentiated state. A better understanding of these initial specification eventswill facilitate identification of deficiencies in current approaches, leading to more faithful differentiation strategies as well as providing insights into the rewiring of human regulatory programs during cellular transitions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Differentiation of human embryonic stemcells (hESCs) provides a unique opportunity to study the regulatory mechanisms that facilitate cellular transitions in a human context. To that end, we performed comprehensive transcriptional and epigenetic profiling of populationsderivedthroughdirecteddifferentiation ofhESCs representing each of the three embryonic germlayers. Integration of whole-genome bisulfite sequencing, chromatin immunoprecipitation sequencing, and RNA sequencing reveals unique events associated with specification toward each lineage. Dynamic alterations inDNAmethylationandH3K4me1are evident at putative distal regulatory elements bound by pluripotency factors or activated in specific lineages. In addition, we identified germ-layer-specific H3K27me3 enrichment at sites exhibiting high DNA methylation in the undifferentiated state. A better understanding of these initial specification eventswill facilitate identification of deficiencies in current approaches, leading to more faithful differentiation strategies as well as providing insights into the rewiring of human regulatory programs during cellular transitions. |
Ziller, Michael J; Gu, Hongcang; Müller, Fabian; Donaghey, Julian; Kohlbacher, Oliver; Bernstein, Bradley E; Gnirke, Andreas; Meissner, Alexander Charting a dynamic DNA methylation landscape of the human genome Nature, 500 , pp. 477–481, 2013. @article{Ziller_MethylationLandscape_Nature_2013, title = {Charting a dynamic DNA methylation landscape of the human genome}, author = {Michael J Ziller and Hongcang Gu and Fabian Müller and Julian Donaghey and Oliver Kohlbacher and Bradley E Bernstein and Andreas Gnirke and Alexander Meissner}, doi = {https://dx.doi.org/10.1038%2Fnature12433}, year = {2013}, date = {2013-01-01}, journal = {Nature}, volume = {500}, pages = {477–481}, abstract = {DNA methylation is a defining feature of mammalian cellular identity and is essential for normal development. Most cell types, except germ cells and pre-implantation embryos, display relatively stable DNA methylation patterns, with 70–80% of all CpGs being methylated. Despite recent advances, we still have a limited understanding of when, where and how many CpGs participate in genomic regulation. Here we report the in-depth analysis of 42 whole-genome bisulphite sequencing data sets across 30 diverse human cell and tissue types. We observe dynamic regulation for only 21.8% of autosomal CpGs within a normal developmental context, most of which are distal to transcription start sites. These dynamic CpGs co-localize with gene regulatory elements, particularly enhancers and transcription-factor-binding sites, which allow identification of key lineage-specific regulators. In addition, differentially methylated regions (DMRs) often contain single nucleotide polymorphisms associated with cell-type-related diseases as determined by genome-wide association studies. The results also highlight the general inefficiency of whole-genome bisulphite sequencing, as 70–80% of the sequencing reads across these data sets provided little or no relevant information about CpG methylation. To demonstrate further the utility of our DMR set, we use it to classify unknown samples and identify representative signature regions that recapitulate major DNA methylation dynamics. In summary, although in theory every CpG can change its methylation state, our results suggest that only a fraction does so as part of coordinated regulatory programs. Therefore, our selected DMRs can serve as a starting point to guide new, more effective reduced representation approaches to capture the most informative fraction of CpGs, as well as further pinpoint putative regulatory elements.}, keywords = {}, pubstate = {published}, tppubtype = {article} } DNA methylation is a defining feature of mammalian cellular identity and is essential for normal development. Most cell types, except germ cells and pre-implantation embryos, display relatively stable DNA methylation patterns, with 70–80% of all CpGs being methylated. Despite recent advances, we still have a limited understanding of when, where and how many CpGs participate in genomic regulation. Here we report the in-depth analysis of 42 whole-genome bisulphite sequencing data sets across 30 diverse human cell and tissue types. We observe dynamic regulation for only 21.8% of autosomal CpGs within a normal developmental context, most of which are distal to transcription start sites. These dynamic CpGs co-localize with gene regulatory elements, particularly enhancers and transcription-factor-binding sites, which allow identification of key lineage-specific regulators. In addition, differentially methylated regions (DMRs) often contain single nucleotide polymorphisms associated with cell-type-related diseases as determined by genome-wide association studies. The results also highlight the general inefficiency of whole-genome bisulphite sequencing, as 70–80% of the sequencing reads across these data sets provided little or no relevant information about CpG methylation. To demonstrate further the utility of our DMR set, we use it to classify unknown samples and identify representative signature regions that recapitulate major DNA methylation dynamics. In summary, although in theory every CpG can change its methylation state, our results suggest that only a fraction does so as part of coordinated regulatory programs. Therefore, our selected DMRs can serve as a starting point to guide new, more effective reduced representation approaches to capture the most informative fraction of CpGs, as well as further pinpoint putative regulatory elements. |
Kyzirakos, C; Pflueckhahn, U; Sturm, M; Schroeder, C; Bauer, P; Walter, M; Feldhahn, M; Walzer, M; Mohr, C; Szolek, A; Bonin, M; Kohlbacher, O; Ebinger, M; Handgretinger, R; Rammensee, H -G; Lang, P iVacALL: utilizing next-generation sequencing for the establishment of an individual peptide vaccination approach for paediatric acute lymphoblastic leukaemia Bone Marrow Transplant., 48 (3), pp. S401, 2013, (39th Annual Meeting of the European-Group-for-Blood-and-Marrow-Transplantation (EBMT), London, ENGLAND, APR 07-10, 2013). @article{iVacALL2013, title = {iVacALL: utilizing next-generation sequencing for the establishment of an individual peptide vaccination approach for paediatric acute lymphoblastic leukaemia}, author = {C Kyzirakos and U Pflueckhahn and M Sturm and C Schroeder and P Bauer and M Walter and M Feldhahn and M Walzer and C Mohr and A Szolek and M Bonin and O Kohlbacher and M Ebinger and R Handgretinger and H -G Rammensee and P Lang}, doi = {http://dx.doi.org/10.1038/bmt.2013.23}, year = {2013}, date = {2013-01-01}, journal = {Bone Marrow Transplant.}, volume = {48}, number = {3}, pages = {S401}, note = {39th Annual Meeting of the European-Group-for-Blood-and-Marrow-Transplantation (EBMT), London, ENGLAND, APR 07-10, 2013}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Thiel, Philipp; Röglin, Lars; Meissner, Nicole; Hennig, Sven; Kohlbacher, Oliver; Ottmann, Christian Virtual Screening and Experimental Validation Reveal Novel Small-Molecule Inhibitors of 14-3-3 Protein-Protein Interactions Chem. Commun., 49 (76), pp. 8468-70, 2013. @article{1433-CC, title = {Virtual Screening and Experimental Validation Reveal Novel Small-Molecule Inhibitors of 14-3-3 Protein-Protein Interactions}, author = {Philipp Thiel and Lars Röglin and Nicole Meissner and Sven Hennig and Oliver Kohlbacher and Christian Ottmann}, doi = {https://doi.org/10.1039/c3cc44612c}, year = {2013}, date = {2013-01-01}, journal = {Chem. Commun.}, volume = {49}, number = {76}, pages = {8468-70}, abstract = {We report first non-covalent and exclusively extracellular inhibitors of 14-3-3 protein-protein interactions identified by virtual screening. Optimization by crystal structure analysis and in vitro binding assays yielded compounds capable of disrupting the interaction of 14-3-3σ with aminopeptidase N in a cellular assay.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We report first non-covalent and exclusively extracellular inhibitors of 14-3-3 protein-protein interactions identified by virtual screening. Optimization by crystal structure analysis and in vitro binding assays yielded compounds capable of disrupting the interaction of 14-3-3σ with aminopeptidase N in a cellular assay. |
Novak, Jiri; Sachsenberg, Timo; Hoksza, David; Skopal, Tomas; Kohlbacher, Oliver On Comparison of SimTandem with State-of-the-Art Peptide Identification Tools, Efficiency of Precursor Mass Filter and Dealing with Variable Modifications J. Integrative Bioinformatics, 10 (3), pp. 228, 2013. @article{JIB2013, title = {On Comparison of SimTandem with State-of-the-Art Peptide Identification Tools, Efficiency of Precursor Mass Filter and Dealing with Variable Modifications}, author = {Jiri Novak and Timo Sachsenberg and David Hoksza and Tomas Skopal and Oliver Kohlbacher}, doi = {https://doi.org/10.2390/biecoll-jib-2013-228}, year = {2013}, date = {2013-01-01}, journal = {J. Integrative Bioinformatics}, volume = {10}, number = {3}, pages = {228}, abstract = {The similarity search in theoretical mass spectra generated from protein sequence databases is a widely accepted approach for identification of peptides from query mass spectra produced by shotgun proteomics. Growing protein sequence databases and noisy query spectra demand database indexing techniques and better similarity measures for the comparison of theoretical spectra against query spectra. We employ a modification of previously proposed parameterized Hausdorff distance for comparisons of mass spectra. The new distance outperforms the original distance, the angle distance and state-of-the-art peptide identification tools OMSSA and X!Tandem in the number of identified peptides even though the q-value is only 0.001. When a precursor mass filter is used as a database indexing technique, our method outperforms OMSSA in the speed of search. When variable modifications are not searched, the search time is similar to X!Tandem. We show that the precursor mass filter is an efficient database indexing technique for high-accuracy data even though many variable modifications are being searched. We demonstrate that the number of identified peptides is bigger when variable modifications are searched separately by more search runs of a peptide identification engine. Otherwise, the false discovery rates are affected by mixing unmodified and modified spectra together resulting in a lower number of identified peptides. Our method is implemented in the freely available application SimTandem which can be used in the framework TOPP based on OpenMS.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The similarity search in theoretical mass spectra generated from protein sequence databases is a widely accepted approach for identification of peptides from query mass spectra produced by shotgun proteomics. Growing protein sequence databases and noisy query spectra demand database indexing techniques and better similarity measures for the comparison of theoretical spectra against query spectra. We employ a modification of previously proposed parameterized Hausdorff distance for comparisons of mass spectra. The new distance outperforms the original distance, the angle distance and state-of-the-art peptide identification tools OMSSA and X!Tandem in the number of identified peptides even though the q-value is only 0.001. When a precursor mass filter is used as a database indexing technique, our method outperforms OMSSA in the speed of search. When variable modifications are not searched, the search time is similar to X!Tandem. We show that the precursor mass filter is an efficient database indexing technique for high-accuracy data even though many variable modifications are being searched. We demonstrate that the number of identified peptides is bigger when variable modifications are searched separately by more search runs of a peptide identification engine. Otherwise, the false discovery rates are affected by mixing unmodified and modified spectra together resulting in a lower number of identified peptides. Our method is implemented in the freely available application SimTandem which can be used in the framework TOPP based on OpenMS. |
Schubert, Benjamin; Lund, Ole; Nielsen, Morten Evaluation of peptide selection approaches for epitope-based vaccine design Tissue Antigens, 82 (4), pp. 243–251, 2013. @article{24461003.2016-04-07.9132271394, title = {Evaluation of peptide selection approaches for epitope-based vaccine design}, author = {Benjamin Schubert and Ole Lund and Morten Nielsen}, url = {http://onlinelibrary.wiley.com/doi/10.1111/tan.12199/full}, year = {2013}, date = {2013-01-01}, journal = {Tissue Antigens}, volume = {82}, number = {4}, pages = {243--251}, abstract = {A major challenge in epitope-based vaccine (EV) design stems from the vast genomic variation of pathogens and the diversity of the host cellular immune system. Several computational approaches have been published to assist the selection of potential T cell epitopes for EV design. So far, no thorough comparison between the current methods has been realized. Using human immunodeficiency virus as test case, different EV selection algorithms were evaluated with respect to their ability to select small peptides sets with broad coverage of allelic and pathogenic diversity. The methods were compared in terms of in silico measurements simulating important vaccine properties like the ability of inducing protection against a multivariant pathogen in a population; the predicted immunogenicity; pathogen, allele, and population coverage; as well as the conservation of selected epitopes. Additionally, we evaluate the use of human leukocyte antigen (HLA) supertypes with regards to their applicability for population-spanning vaccine design. The results showed that in terms of induced protection methods that simultaneously aim to optimize pathogen and HLA coverage significantly outperform methods focusing on pathogen coverage alone. Moreover, supertype-based approaches for coverage of HLA diversity were showed to yield only satisfying results in populations in which the supertype representatives are prevalent.}, keywords = {}, pubstate = {published}, tppubtype = {article} } A major challenge in epitope-based vaccine (EV) design stems from the vast genomic variation of pathogens and the diversity of the host cellular immune system. Several computational approaches have been published to assist the selection of potential T cell epitopes for EV design. So far, no thorough comparison between the current methods has been realized. Using human immunodeficiency virus as test case, different EV selection algorithms were evaluated with respect to their ability to select small peptides sets with broad coverage of allelic and pathogenic diversity. The methods were compared in terms of in silico measurements simulating important vaccine properties like the ability of inducing protection against a multivariant pathogen in a population; the predicted immunogenicity; pathogen, allele, and population coverage; as well as the conservation of selected epitopes. Additionally, we evaluate the use of human leukocyte antigen (HLA) supertypes with regards to their applicability for population-spanning vaccine design. The results showed that in terms of induced protection methods that simultaneously aim to optimize pathogen and HLA coverage significantly outperform methods focusing on pathogen coverage alone. Moreover, supertype-based approaches for coverage of HLA diversity were showed to yield only satisfying results in populations in which the supertype representatives are prevalent. |
Michta, Ewelina; Schad, Klaus; Blin, Kai; Ort-Winklbauer, Regina; Röttig, Marc; Kohlbacher, Oliver; Wohlleben, Wolfgang; Schinko, Eva; Mast, Yvonne The bifunctional role of aconitase in Streptomyces viridochromogenes Tü494 Env. Microbiol., 2012. @article{Aconitase, title = {The bifunctional role of aconitase in Streptomyces viridochromogenes Tü494}, author = {Ewelina Michta and Klaus Schad and Kai Blin and Regina Ort-Winklbauer and Marc Röttig and Oliver Kohlbacher and Wolfgang Wohlleben and Eva Schinko and Yvonne Mast}, doi = {https://doi.org/10.1111/1462-2920.12006}, year = {2012}, date = {2012-01-01}, journal = {Env. Microbiol.}, abstract = {In many organisms, aconitases have dual functions; they serve as enzymes in the tricarboxylic acid cycle and as regulators of iron metabolism. In this study we defined the role of the aconitase AcnA in Streptomyces viridochromogenes Tü494, the producer of the herbicide phosphinothricyl-alanyl-alanine, also known as phosphinothricin tripeptide or bialaphos. A mutant in which the aconitase gene acnA was disrupted showed severe defects in morphology and physiology, as it was unable to form any aerial mycelium, spores nor phosphinothricin tripeptide. AcnA belongs to the iron regulatory proteins (IRPs). In addition to its catalytic function, AcnA plays a regulatory role by binding to iron responsive elements (IREs) located on the untranslated region of certain mRNAs. A mutation preventing the formation of the [4Fe-4S] cluster of AcnA eliminated its catalytic activity, but did not inhibit RNA-binding ability. In silico analysis of the S. viridochromogenes genome revealed several IRE-like structures. One structure is located upstream of recA, which is involved in the bacterial SOS response, and another one was identified upstream of ftsZ, which is required for the onset of sporulation in streptomycetes. The functionality of different IRE structures was proven with gel shift assays and specific IRE consensus sequences were defined. Furthermore, RecA was shown to be up-regulated on posttranscriptional level under oxidative stress conditions in the wild-type strain but not in the acnA mutant, suggesting a regulatory role of AcnA in oxidative stress response.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In many organisms, aconitases have dual functions; they serve as enzymes in the tricarboxylic acid cycle and as regulators of iron metabolism. In this study we defined the role of the aconitase AcnA in Streptomyces viridochromogenes Tü494, the producer of the herbicide phosphinothricyl-alanyl-alanine, also known as phosphinothricin tripeptide or bialaphos. A mutant in which the aconitase gene acnA was disrupted showed severe defects in morphology and physiology, as it was unable to form any aerial mycelium, spores nor phosphinothricin tripeptide. AcnA belongs to the iron regulatory proteins (IRPs). In addition to its catalytic function, AcnA plays a regulatory role by binding to iron responsive elements (IREs) located on the untranslated region of certain mRNAs. A mutation preventing the formation of the [4Fe-4S] cluster of AcnA eliminated its catalytic activity, but did not inhibit RNA-binding ability. In silico analysis of the S. viridochromogenes genome revealed several IRE-like structures. One structure is located upstream of recA, which is involved in the bacterial SOS response, and another one was identified upstream of ftsZ, which is required for the onset of sporulation in streptomycetes. The functionality of different IRE structures was proven with gel shift assays and specific IRE consensus sequences were defined. Furthermore, RecA was shown to be up-regulated on posttranscriptional level under oxidative stress conditions in the wild-type strain but not in the acnA mutant, suggesting a regulatory role of AcnA in oxidative stress response. |
Briesemeister, Sebastian; Rahnenführer, Jörg; Kohlbacher, Oliver No longer confidential: Estimating the Confidence of Individual Regression Predictions PLoS One, 7 (11), pp. e48723, 2012. @article{ConfyPLOS1, title = {No longer confidential: Estimating the Confidence of Individual Regression Predictions}, author = {Sebastian Briesemeister and Jörg Rahnenführer and Oliver Kohlbacher}, doi = {https://doi.org/10.1371/journal.pone.0048723}, year = {2012}, date = {2012-01-01}, journal = {PLoS One}, volume = {7}, number = {11}, pages = {e48723}, abstract = {Quantitative predictions in computational life sciences are often based on regression models. The advent of machine learning has led to highly accurate regression models that have gained widespread acceptance. While there are statistical methods available to estimate the global performance of regression models on a test or training dataset, it is often not clear how well this performance transfers to other datasets or how reliable an individual prediction is–a fact that often reduces a user’s trust into a computational method. In analogy to the concept of an experimental error, we sketch how estimators for individual prediction errors can be used to provide confidence intervals for individual predictions. Two novel statistical methods, named CONFINE and CONFIVE, can estimate the reliability of an individual prediction based on the local properties of nearby training data. The methods can be applied equally to linear and non-linear regression methods with very little computational overhead. We compare our confidence estimators with other existing confidence and applicability domain estimators on two biologically relevant problems (MHC–peptide binding prediction and quantitative structure-activity relationship (QSAR)). Our results suggest that the proposed confidence estimators perform comparable to or better than previously proposed estimation methods. Given a sufficient amount of training data, the estimators exhibit error estimates of high quality. In addition, we observed that the quality of estimated confidence intervals is predictable. We discuss how confidence estimation is influenced by noise, the number of features, and the dataset size. Estimating the confidence in individual prediction in terms of error intervals represents an important step from plain, non-informative predictions towards transparent and interpretable predictions that will help to improve the acceptance of computational methods in the biological community.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Quantitative predictions in computational life sciences are often based on regression models. The advent of machine learning has led to highly accurate regression models that have gained widespread acceptance. While there are statistical methods available to estimate the global performance of regression models on a test or training dataset, it is often not clear how well this performance transfers to other datasets or how reliable an individual prediction is–a fact that often reduces a user’s trust into a computational method. In analogy to the concept of an experimental error, we sketch how estimators for individual prediction errors can be used to provide confidence intervals for individual predictions. Two novel statistical methods, named CONFINE and CONFIVE, can estimate the reliability of an individual prediction based on the local properties of nearby training data. The methods can be applied equally to linear and non-linear regression methods with very little computational overhead. We compare our confidence estimators with other existing confidence and applicability domain estimators on two biologically relevant problems (MHC–peptide binding prediction and quantitative structure-activity relationship (QSAR)). Our results suggest that the proposed confidence estimators perform comparable to or better than previously proposed estimation methods. Given a sufficient amount of training data, the estimators exhibit error estimates of high quality. In addition, we observed that the quality of estimated confidence intervals is predictable. We discuss how confidence estimation is influenced by noise, the number of features, and the dataset size. Estimating the confidence in individual prediction in terms of error intervals represents an important step from plain, non-informative predictions towards transparent and interpretable predictions that will help to improve the acceptance of computational methods in the biological community. |
Kenar, Erhan; Franken, Holger; Rosenbaum, Lars; Lehmann, Rainer; Forcisi, S; Wörmann, Kilian; Lucio, M; König, Andre; Rahnenführer, Jörg; Schmidt-Kopplin, Philippe; Haering, Hans-Ulrich; Zell, Andreas; Kohlbacher, Oliver Mit Bioinformatik zu Biomarkern Med. Welt, 63 (5), pp. 245-50, 2012. @article{MedWeltBiomarker2012, title = {Mit Bioinformatik zu Biomarkern}, author = {Erhan Kenar and Holger Franken and Lars Rosenbaum and Rainer Lehmann and S Forcisi and Kilian Wörmann and M Lucio and Andre König and Jörg Rahnenführer and Philippe Schmidt-Kopplin and Hans-Ulrich Haering and Andreas Zell and Oliver Kohlbacher}, year = {2012}, date = {2012-01-01}, journal = {Med. Welt}, volume = {63}, number = {5}, pages = {245-50}, abstract = {In der medizinischen Forschung beschäftigen sich Metabolomikstudien mit der Untersuchung von komplexen Metabolitenmustern in Körperflüssigkeiten, Geweben und Zellkultur. Sie detektieren parallel große Anzahlen verschiedener Metaboliten in einer Probe und erzeugen Datensätze, die groß genug sind, um statistisch valide Informationen daraus abzuleiten. So können z. B. Unterschiede zwischen gesunden und erkrankten Individuen erkannt, Anzeichen von Erkrankungen erfasst werden bevor klinische Symptome zu erkennen sind, sowie neue diagnostische Marker entdeckt werden. Gleichzeitig sind die entstehenden Datenmengen jedoch zu umfangreich, um manuell interpretiert zu werden. Leistungsfähige Methoden aus der Bioinformatik und Statistik sind daher für eine zuverlässige Auswertung von Metabolomdaten unerlässlich.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In der medizinischen Forschung beschäftigen sich Metabolomikstudien mit der Untersuchung von komplexen Metabolitenmustern in Körperflüssigkeiten, Geweben und Zellkultur. Sie detektieren parallel große Anzahlen verschiedener Metaboliten in einer Probe und erzeugen Datensätze, die groß genug sind, um statistisch valide Informationen daraus abzuleiten. So können z. B. Unterschiede zwischen gesunden und erkrankten Individuen erkannt, Anzeichen von Erkrankungen erfasst werden bevor klinische Symptome zu erkennen sind, sowie neue diagnostische Marker entdeckt werden. Gleichzeitig sind die entstehenden Datenmengen jedoch zu umfangreich, um manuell interpretiert zu werden. Leistungsfähige Methoden aus der Bioinformatik und Statistik sind daher für eine zuverlässige Auswertung von Metabolomdaten unerlässlich. |
Nahnsen, Sven; Kohlbacher, Oliver In silico design of targeted SRM-based experiments BMC Bioinformatics, 13 , pp. S8, 2012. @article{MRMDesign2012, title = {In silico design of targeted SRM-based experiments}, author = {Sven Nahnsen and Oliver Kohlbacher}, url = {http://www.biomedcentral.com/1471-2105/13/S16/S8}, year = {2012}, date = {2012-01-01}, journal = {BMC Bioinformatics}, volume = {13}, pages = {S8}, abstract = {Selected reaction monitoring (SRM)-based proteomics approaches enable highly sensitive and reproducible assays for profiling of thousands of peptides in one experiment. The development of such assays involves the determination of retention time, detectability and fragmentation properties of peptides, followed by an optimal selection of transitions. If those properties have to be identified experimentally, the assay development becomes a time-consuming task. We introduce a computational framework for the optimal selection of transitions for a given set of proteins based on their sequence information alone or in conjunction with already existing transition databases. The presented method enables the rapid and fully automated initial development of assays for targeted proteomics. We introduce the relevant methods, report and discuss a step-wise and generic protocol and we also show that we can reach an ad hoc coverage of 80 % of the targeted proteins. The presented algorithmic procedure is implemented in the open-source software package OpenMS/TOPP.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Selected reaction monitoring (SRM)-based proteomics approaches enable highly sensitive and reproducible assays for profiling of thousands of peptides in one experiment. The development of such assays involves the determination of retention time, detectability and fragmentation properties of peptides, followed by an optimal selection of transitions. If those properties have to be identified experimentally, the assay development becomes a time-consuming task. We introduce a computational framework for the optimal selection of transitions for a given set of proteins based on their sequence information alone or in conjunction with already existing transition databases. The presented method enables the rapid and fully automated initial development of assays for targeted proteomics. We introduce the relevant methods, report and discuss a step-wise and generic protocol and we also show that we can reach an ad hoc coverage of 80 % of the targeted proteins. The presented algorithmic procedure is implemented in the open-source software package OpenMS/TOPP. |
Malisi, Christoph U; Toussaint, Nora C; Kohlbacher, Oliver; Höcker, Birte Binding pocket optimization by computational protein design PLoS One, 7 (12), pp. e52505, 2012. @article{articlereference.2012-11-14.7948826515, title = {Binding pocket optimization by computational protein design}, author = {Christoph U Malisi and Nora C Toussaint and Oliver Kohlbacher and Birte Höcker}, url = {http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0052505}, year = {2012}, date = {2012-01-01}, journal = {PLoS One}, volume = {7}, number = {12}, pages = {e52505}, abstract = {Engineering specific interactions between proteins and small molecules is extremely useful for biological studies, as these interactions are essential for molecular recognition. Furthermore, many biotechnological applications are made possible by such an engineering approach, ranging from biosensors to the design of custom enzyme catalysts. Here, we present a novel method for the computational design of protein-small ligand binding named PocketOptimizer. The program can be used to modify protein binding pocket residues to improve or establish binding of a small molecule. It is a modular pipeline based on a number of customizable molecular modeling tools to predict mutations that alter the affinity of a target protein to its ligand. At its heart it uses a receptor-ligand scoring function to estimate the binding free energy between protein and ligand. We compiled a benchmark set that we used to systematically assess the performance of our method. It consists of proteins for which mutational variants with different binding affinities for their ligands and experimentally determined structures exist. Within this test set PocketOptimizer correctly predicts the mutant with the higher affinity in about 69% of the cases. A detailed analysis of the results reveals that the strengths of PocketOptimizer lie in the correct introduction of stabilizing hydrogen bonds to the ligand, as well as in the improved geometric complemetarity between ligand and binding pocket. Apart from the novel method for binding pocket design we also introduce a much needed benchmark data set for the comparison of affinities of mutant binding pockets, and that we use to asses programs for in silico design of ligand binding.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Engineering specific interactions between proteins and small molecules is extremely useful for biological studies, as these interactions are essential for molecular recognition. Furthermore, many biotechnological applications are made possible by such an engineering approach, ranging from biosensors to the design of custom enzyme catalysts. Here, we present a novel method for the computational design of protein-small ligand binding named PocketOptimizer. The program can be used to modify protein binding pocket residues to improve or establish binding of a small molecule. It is a modular pipeline based on a number of customizable molecular modeling tools to predict mutations that alter the affinity of a target protein to its ligand. At its heart it uses a receptor-ligand scoring function to estimate the binding free energy between protein and ligand. We compiled a benchmark set that we used to systematically assess the performance of our method. It consists of proteins for which mutational variants with different binding affinities for their ligands and experimentally determined structures exist. Within this test set PocketOptimizer correctly predicts the mutant with the higher affinity in about 69% of the cases. A detailed analysis of the results reveals that the strengths of PocketOptimizer lie in the correct introduction of stabilizing hydrogen bonds to the ligand, as well as in the improved geometric complemetarity between ligand and binding pocket. Apart from the novel method for binding pocket design we also introduce a much needed benchmark data set for the comparison of affinities of mutant binding pockets, and that we use to asses programs for in silico design of ligand binding. |
Gesing, Sandra; Herres-Pawlis, Sonja; Birkenheuer, Georg; Brinkmann, André; Grunzke, Richard; Kacsuk, Peter; Kohlbacher, Oliver; Kozlovszky, Miklos; Krüger, Jens; Müller-Pfefferkorn, Ralph; Schäfer, Patrick; Steinke, Thomas A Science Gateway Getting Ready for Serving the International Molecular Simulation Community Proceedings of Science, PoS(EGICF12-EMITC2)050 , 2012. @article{articlereference.2012-11-25.5594312996, title = {A Science Gateway Getting Ready for Serving the International Molecular Simulation Community}, author = {Sandra Gesing and Sonja Herres-Pawlis and Georg Birkenheuer and André Brinkmann and Richard Grunzke and Peter Kacsuk and Oliver Kohlbacher and Miklos Kozlovszky and Jens Krüger and Ralph Müller-Pfefferkorn and Patrick Schäfer and Thomas Steinke}, url = {http://pos.sissa.it/archive/conferences/162/050/EGICF12-EMITC2_050.pdf}, year = {2012}, date = {2012-01-01}, journal = {Proceedings of Science}, volume = {PoS(EGICF12-EMITC2)050}, abstract = {The project MoSGrid (Molecular Simulation Grid) has been developing a web-based science gateway supporting the community with various services for quantum chemistry, molecular modeling, and docking. Users gain access to distributed computing infrastructures (DCIs) via intuitive user interfaces for sophisticated tools, specialized workflows, and distributed repositories. Currently, the MoSGrid community consists of about 120 users from a number of fields related to chemistry and bioinformatics located in Germany. However, the underlying security infrastructure is generally applicable and can be deployed in arbitrary projects. MoSGrid intends to address the international community by participating in the EU-projects SCI-BUS (Scientific gateway Based User Support) and ER-flow (Building an European Research Community through Interoperable Workflows and Data), and collaborating with the EU-project EDGI (European Desktop Grid Initiative).}, keywords = {}, pubstate = {published}, tppubtype = {article} } The project MoSGrid (Molecular Simulation Grid) has been developing a web-based science gateway supporting the community with various services for quantum chemistry, molecular modeling, and docking. Users gain access to distributed computing infrastructures (DCIs) via intuitive user interfaces for sophisticated tools, specialized workflows, and distributed repositories. Currently, the MoSGrid community consists of about 120 users from a number of fields related to chemistry and bioinformatics located in Germany. However, the underlying security infrastructure is generally applicable and can be deployed in arbitrary projects. MoSGrid intends to address the international community by participating in the EU-projects SCI-BUS (Scientific gateway Based User Support) and ER-flow (Building an European Research Community through Interoperable Workflows and Data), and collaborating with the EU-project EDGI (European Desktop Grid Initiative). |
Birkenheuer, Georg; Blunk, Dirk; Breuers, Sebastian; Brinkmann, Andre; Fels, Gregor; Gesing, Sandra; Grunzke, Richard; Herres-Pawlis, Sonja; Kohlbacher, Oliver; Krüger, Jens; Lang, Ulrich; Packschies, Lars; Müller-Pfefferkorn, Ralf; Schäfer, Patrick; Schuster, Johannes; Steinke, Thomas; Warzecha, Klaus; Wewior, Martin MoSGrid: Progress of Workflow driven Chemical Simulations Proc. of Grid Workflow Workshop 2011, Cologne, Germany, 826 , CEUR Workshop Proceedings, 2012. @conference{conferencereference.2011-03-20.2479075902, title = {MoSGrid: Progress of Workflow driven Chemical Simulations}, author = {Georg Birkenheuer and Dirk Blunk and Sebastian Breuers and Andre Brinkmann and Gregor Fels and Sandra Gesing and Richard Grunzke and Sonja Herres-Pawlis and Oliver Kohlbacher and Jens Krüger and Ulrich Lang and Lars Packschies and Ralf Müller-Pfefferkorn and Patrick Schäfer and Johannes Schuster and Thomas Steinke and Klaus Warzecha and Martin Wewior}, url = {CEUR-WS.org/Vol-826/paper02.pdf}, year = {2012}, date = {2012-01-01}, booktitle = {Proc. of Grid Workflow Workshop 2011, Cologne, Germany}, volume = {826}, publisher = {CEUR Workshop Proceedings}, abstract = {Motivation: Web-based access to computational chemistry grid resources has proven to be a viable approach to simplify the use of simulation codes. The introduction of recipes allows to reuse already developed chemical workflows. By this means, workflows for reocurring basic compute jobs can be provided for daily services. Nevertheless, the same platform has to be open for active workflow development by experienced users. This paper provides an overview of recent developments of the MoSGrid project on providing tools and instruments for building workflow recipes.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } Motivation: Web-based access to computational chemistry grid resources has proven to be a viable approach to simplify the use of simulation codes. The introduction of recipes allows to reuse already developed chemical workflows. By this means, workflows for reocurring basic compute jobs can be provided for daily services. Nevertheless, the same platform has to be open for active workflow development by experienced users. This paper provides an overview of recent developments of the MoSGrid project on providing tools and instruments for building workflow recipes. |
Wörman, Kilian; Lucio, M; Forcisi, S; Heinzmann, S S; Kenar, E; Franken, H; Rosenbaum, L; Schmitt-Kopplin, P; Kohlbacher, O; Zell, A; Häring, H -U; Lehmann, R „Metabolomics“ in der Diabetesforschung Der Diabetologe, (1), pp. 42-48, 2012. @article{MetaboDiabetologe, title = {„Metabolomics“ in der Diabetesforschung}, author = {Kilian Wörman and M Lucio and S Forcisi and S S Heinzmann and E Kenar and H Franken and L Rosenbaum and P Schmitt-Kopplin and O Kohlbacher and A Zell and H -U Häring and R Lehmann}, url = {http://www.springerlink.com/content/x1k613813u555481/}, year = {2012}, date = {2012-01-01}, journal = {Der Diabetologe}, number = {1}, pages = {42-48}, abstract = {Recently a new promising strategy has been introduced to the well-established approaches in diabetes research. Biomedical metabolomic analyses comprise the examination of metabolite patterns in different body fluids, tissues or samples from cell culture experiments with the objective to maximize the simultaneous detection of intermediate and end products of metabolism. Metabolomic analysis in diabetes research could provide new insights in the pathogenetic scenario of prediabetes, diabetes and its late complications as well as the discovery of novel diagnostic biomarkers. This review provides an overview of metabolomic analyses and a summary of current research results in metabolomics in diabetes research.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Recently a new promising strategy has been introduced to the well-established approaches in diabetes research. Biomedical metabolomic analyses comprise the examination of metabolite patterns in different body fluids, tissues or samples from cell culture experiments with the objective to maximize the simultaneous detection of intermediate and end products of metabolism. Metabolomic analysis in diabetes research could provide new insights in the pathogenetic scenario of prediabetes, diabetes and its late complications as well as the discovery of novel diagnostic biomarkers. This review provides an overview of metabolomic analyses and a summary of current research results in metabolomics in diabetes research. |
Gesing, Sandra; Herres-Pawlis, Sonja; Birkenheuer, Georg; Brinkmann, André; Grunzke, Richard; Kacsuk, Peter; Kohlbacher, Oliver; Kozlovszky, Miklos; Krüger, Jens; Müller-Pfefferkorn, Ralf; Schäfer, Patrick; Steinke, Thomas The MoSGrid Community – From National to International Scale EGI Community Forum 2012, 2012. @conference{conferencereference.2012-01-26.2488003050, title = {The MoSGrid Community – From National to International Scale}, author = {Sandra Gesing and Sonja Herres-Pawlis and Georg Birkenheuer and André Brinkmann and Richard Grunzke and Peter Kacsuk and Oliver Kohlbacher and Miklos Kozlovszky and Jens Krüger and Ralf Müller-Pfefferkorn and Patrick Schäfer and Thomas Steinke}, year = {2012}, date = {2012-01-01}, booktitle = {EGI Community Forum 2012}, abstract = {The project MoSGrid (Molecular Simulation Grid) offers a web-based science gateway supporting the community with various services for molecular modeling, docking, and quantum chemistry. Users gain access to distributed computing infrastructures (DCIs) via intuitive user interfaces for sophisticated tools, specialized workflows, and distributed repositories. Currently, the MoSGrid community consists of over 50 research groups from a number of fields related to chemistry and bioinformatics located in Germany. However, the underlying security infrastructure is not limited to national requirements. It can be easily configured for international users. MoSGrid intends to address the international community by participating in the EU-project SCI-BUS (Scientific gateway Based User Support), collaborating with the EU-projects SHIWA (SHaring Interoperable Workflows for large-scale scientific simulations on Available DCIs), and EDGI (European Desktop Grid Initiative).}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The project MoSGrid (Molecular Simulation Grid) offers a web-based science gateway supporting the community with various services for molecular modeling, docking, and quantum chemistry. Users gain access to distributed computing infrastructures (DCIs) via intuitive user interfaces for sophisticated tools, specialized workflows, and distributed repositories. Currently, the MoSGrid community consists of over 50 research groups from a number of fields related to chemistry and bioinformatics located in Germany. However, the underlying security infrastructure is not limited to national requirements. It can be easily configured for international users. MoSGrid intends to address the international community by participating in the EU-project SCI-BUS (Scientific gateway Based User Support), collaborating with the EU-projects SHIWA (SHaring Interoperable Workflows for large-scale scientific simulations on Available DCIs), and EDGI (European Desktop Grid Initiative). |
Jones, Andrew R; Eisenacher, Martin; Mayer, Gerhard; Kohlbacher, Oliver; Siepen, Jennifer; Hubbard, Simon J; Selley, Julian N; Searle, Brian C; Shofstahl, James; Seymour, Sean L; Julian, Randall; Binz, Pierre-Alain; Deutsch, Eric W; Hermjakob, Henning; Reisinger, Florian; Griss, Johannes; Vizcaino, Juan Antonio; Chambers, Matthew; Pizarro, Angel; Creasy, David The mzIdentML data standard for mass spectrometry-based proteomics results Mol. Cell. Prot., 11 (7), pp. M111.014381, 2012. @article{mzIdentML, title = {The mzIdentML data standard for mass spectrometry-based proteomics results}, author = {Andrew R Jones and Martin Eisenacher and Gerhard Mayer and Oliver Kohlbacher and Jennifer Siepen and Simon J Hubbard and Julian N Selley and Brian C Searle and James Shofstahl and Sean L Seymour and Randall Julian and Pierre-Alain Binz and Eric W Deutsch and Henning Hermjakob and Florian Reisinger and Johannes Griss and Juan Antonio Vizcaino and Matthew Chambers and Angel Pizarro and David Creasy}, url = {http://www.mcponline.org/content/early/2012/02/27/mcp.M111.014381}, year = {2012}, date = {2012-01-01}, journal = {Mol. Cell. Prot.}, volume = {11}, number = {7}, pages = {M111.014381}, abstract = {We report the release of mzIdentML, an exchange standard for peptide and protein identification data, designed by the Proteomics Standards Initiative (PSI). The format was developed by the PSI in collaboration with instrument and software vendors, and the developers of the major open-source projects in proteomics. Software implementations have been developed to enable conversion from most popular proprietary and open-source formats, and mzIdentML will soon be supported by the major public repositories. These developments enable proteomics scientists to start working with the standard for exchanging and publishing data sets in support of publications and they provide a stable platform for bioinformatics groups and commercial software vendors to work with a single file format for identification data.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We report the release of mzIdentML, an exchange standard for peptide and protein identification data, designed by the Proteomics Standards Initiative (PSI). The format was developed by the PSI in collaboration with instrument and software vendors, and the developers of the major open-source projects in proteomics. Software implementations have been developed to enable conversion from most popular proprietary and open-source formats, and mzIdentML will soon be supported by the major public repositories. These developments enable proteomics scientists to start working with the standard for exchanging and publishing data sets in support of publications and they provide a stable platform for bioinformatics groups and commercial software vendors to work with a single file format for identification data. |
Trusch, Maria; Tillack, Kati; Kwiatkowski, Marcel; Bertsch, Andreas; Ahrends, Robert; Kohlbacher, Oliver; Martin, Roland; Sospedra, Mirieia; Schlüter, Hartmut Displacement chromatography as first separating step in online two-dimensional liquid chromatography coupled to mass spectrometry analysis of a complex protein sample—The proteome of neutrophils J. Chromatogr. A, 1232 , pp. 288-94, 2012. @article{DCNeutrophils, title = {Displacement chromatography as first separating step in online two-dimensional liquid chromatography coupled to mass spectrometry analysis of a complex protein sample—The proteome of neutrophils}, author = {Maria Trusch and Kati Tillack and Marcel Kwiatkowski and Andreas Bertsch and Robert Ahrends and Oliver Kohlbacher and Roland Martin and Mirieia Sospedra and Hartmut Schlüter}, doi = {https://doi.org/10.1016/j.chroma.2012.02.029}, year = {2012}, date = {2012-01-01}, journal = {J. Chromatogr. A}, volume = {1232}, pages = {288-94}, abstract = {Displacement chromatography provides some advantages over elution chromatography such as the opportunity to enrich trace amounts of molecules and to elute molecules in highest concentrations achievable with liquid chromatography. In a previous study we demonstrated that displacement chromatography is a well-suited alternative to gradient elution in an offline two-dimensional (2D-)LC-MS approach for the analysis of proteomes. In this study we present a method for applying displacement chromatography in an online 2D-LC-MS system including a cation exchange (CEX) column and a reversed phase column. We circumvented the problem of determining the sample capacity of the CEX column by repeated injection (pulses) of sample aliquots monitored by an LC-MS analysis of each flow-through fraction of the CEX column. Elution of tryptic peptides from the CEX column was achieved by repeated injection (pulses) of the displacer spermine. Pulsed displacer injections offer the advantage through physical separation of preventing post-column mixing of already separated compounds. As a proof of principle we analyzed the cytosolic proteome of human neutrophils.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Displacement chromatography provides some advantages over elution chromatography such as the opportunity to enrich trace amounts of molecules and to elute molecules in highest concentrations achievable with liquid chromatography. In a previous study we demonstrated that displacement chromatography is a well-suited alternative to gradient elution in an offline two-dimensional (2D-)LC-MS approach for the analysis of proteomes. In this study we present a method for applying displacement chromatography in an online 2D-LC-MS system including a cation exchange (CEX) column and a reversed phase column. We circumvented the problem of determining the sample capacity of the CEX column by repeated injection (pulses) of sample aliquots monitored by an LC-MS analysis of each flow-through fraction of the CEX column. Elution of tryptic peptides from the CEX column was achieved by repeated injection (pulses) of the displacer spermine. Pulsed displacer injections offer the advantage through physical separation of preventing post-column mixing of already separated compounds. As a proof of principle we analyzed the cytosolic proteome of human neutrophils. |
Röglin, Lars; Thiel, Philipp; Kohlbacher, Oliver; Ottmann, Christian Covalent attachment of pyridoxal-phosphate derivatives to 14-3-3 proteins Proc. Natl. Acad. Sci. USA, 109 (18), pp. E1051-3, 2012. @article{PNASLetter1433, title = {Covalent attachment of pyridoxal-phosphate derivatives to 14-3-3 proteins}, author = {Lars Röglin and Philipp Thiel and Oliver Kohlbacher and Christian Ottmann}, doi = {https://dx.doi.org/10.1073%2Fpnas.1116592109}, year = {2012}, date = {2012-01-01}, journal = {Proc. Natl. Acad. Sci. USA}, volume = {109}, number = {18}, pages = {E1051-3}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Junker, Johannes; Bielow, Chris; Bertsch, Andreas; Sturm, Marc; Reinert, Knut; Kohlbacher, Oliver TOPPAS: A graphical workflow editor for the analysis of high-throughput proteomics data J. Proteome Res., 11 (7), pp. 3914-20, 2012. @article{TOPPAS-JPR, title = {TOPPAS: A graphical workflow editor for the analysis of high-throughput proteomics data}, author = {Johannes Junker and Chris Bielow and Andreas Bertsch and Marc Sturm and Knut Reinert and Oliver Kohlbacher}, url = {https://pubs.acs.org/doi/10.1021/pr300187f}, year = {2012}, date = {2012-01-01}, journal = {J. Proteome Res.}, volume = {11}, number = {7}, pages = {3914-20}, abstract = {Mass spectrometry coupled to high-performance liquid chromatography (HPLC-MS) is evolving more quickly than ever. A wide range of different instrument types and experimental setups are commonly used. Modern instruments acquire huge amounts of data, thus requiring tools for an efficient and automated data analysis. Most existing software for analyzing HPLC-MS data is monolithic and tailored towards a specific application. A more flexible alternative consists in pipeline-based tool kits allowing the construction of custom analysis workflows from small building blocks, e.g., the Trans Proteomics Pipeline (TPP) or The OpenMS Proteomics Pipeline (TOPP). One drawback, however, is the hurdle of setting up complex workflows using command line tools. We present TOPPAS, The OpenMS Proteomics Pipeline ASsistant, a graphical user interface (GUI) for rapid composition of HPLC-MS analysis workflows. Workflow construction reduces to simple drag-and-drop of analysis tools and adding connections in between. Integration of external tools into these workflows is possible as well. Once workflows have been developed, they can be deployed in other workflow management systems or batch processing systems in a fully automated fashion. The implementation is portable and has been tested under Windows, Mac OS X, and Linux. TOPPAS is open-source software and available free of charge at http://www.OpenMS.de/TOPPAS.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Mass spectrometry coupled to high-performance liquid chromatography (HPLC-MS) is evolving more quickly than ever. A wide range of different instrument types and experimental setups are commonly used. Modern instruments acquire huge amounts of data, thus requiring tools for an efficient and automated data analysis. Most existing software for analyzing HPLC-MS data is monolithic and tailored towards a specific application. A more flexible alternative consists in pipeline-based tool kits allowing the construction of custom analysis workflows from small building blocks, e.g., the Trans Proteomics Pipeline (TPP) or The OpenMS Proteomics Pipeline (TOPP). One drawback, however, is the hurdle of setting up complex workflows using command line tools. We present TOPPAS, The OpenMS Proteomics Pipeline ASsistant, a graphical user interface (GUI) for rapid composition of HPLC-MS analysis workflows. Workflow construction reduces to simple drag-and-drop of analysis tools and adding connections in between. Integration of external tools into these workflows is possible as well. Once workflows have been developed, they can be deployed in other workflow management systems or batch processing systems in a fully automated fashion. The implementation is portable and has been tested under Windows, Mac OS X, and Linux. TOPPAS is open-source software and available free of charge at http://www.OpenMS.de/TOPPAS. |
Ahrends, Robert; Lichtner, Björn; Buck, Friedrich; Hildebrand, Diana; Kotasinska, Marta; Kohlbacher, Oliver; Kwiatkowski, Marcel; Wagner, Moritz; Trusch, Maria; Schlüter, Hartmut Comparison of displacement versus gradient mode for separation of a complex protein mixture by anion-exchange chromatography J. Chromatogr. B, 901 , pp. 34-40, 2012. @article{AhrendsComparisonJChromB2012, title = {Comparison of displacement versus gradient mode for separation of a complex protein mixture by anion-exchange chromatography}, author = {Robert Ahrends and Björn Lichtner and Friedrich Buck and Diana Hildebrand and Marta Kotasinska and Oliver Kohlbacher and Marcel Kwiatkowski and Moritz Wagner and Maria Trusch and Hartmut Schlüter}, doi = {https://doi.org/10.1016/j.jchromb.2012.05.037}, year = {2012}, date = {2012-01-01}, journal = {J. Chromatogr. B}, volume = {901}, pages = {34-40}, abstract = {Liquid chromatography is often the method of choice for the analysis of proteins in their native state. Nevertheless compared to two-dimensional electrophoresis, the resolution of common chromatographic techniques is low. Liquid chromatography in the displacement mode has previously been shown to offer higher resolution and to elute proteins in the high concentrations. In this study we compared to what extend displacement mode was a suitable alternative to gradient mode for the separation of a complex protein mixture using anion-exchange displacement chromatography and if it is therefore helpful for proteomic investigations. Hence we analyzed the qualitative protein composition of each fraction by tryptic digestion of the proteins, analysis of the tryptic peptides by liquid chromatography coupled to mass spectrometry followed by data base analysis and by measuring the elution profiles of 22 selected proteins with selected reaction monitoring mass spectrometry. In the fractions of displacement mode a significantly higher number of identified proteins (51 versus 16) was yielded in comparison to gradient mode. The resolution of displacement chromatography was slightly lower than of gradient chromatography for many but not for all proteins. The selectivities of displacement mode and gradient mode are very different. In conclusion displacement chromatography is a well suited alternative for top-down proteomic approaches which start with separating intact proteins first prior to mass spectrometric analysis of intact or digested proteins. The significant orthogonality of both modes may be used in the future for combining them in multidimensional fractionation procedures.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Liquid chromatography is often the method of choice for the analysis of proteins in their native state. Nevertheless compared to two-dimensional electrophoresis, the resolution of common chromatographic techniques is low. Liquid chromatography in the displacement mode has previously been shown to offer higher resolution and to elute proteins in the high concentrations. In this study we compared to what extend displacement mode was a suitable alternative to gradient mode for the separation of a complex protein mixture using anion-exchange displacement chromatography and if it is therefore helpful for proteomic investigations. Hence we analyzed the qualitative protein composition of each fraction by tryptic digestion of the proteins, analysis of the tryptic peptides by liquid chromatography coupled to mass spectrometry followed by data base analysis and by measuring the elution profiles of 22 selected proteins with selected reaction monitoring mass spectrometry. In the fractions of displacement mode a significantly higher number of identified proteins (51 versus 16) was yielded in comparison to gradient mode. The resolution of displacement chromatography was slightly lower than of gradient chromatography for many but not for all proteins. The selectivities of displacement mode and gradient mode are very different. In conclusion displacement chromatography is a well suited alternative for top-down proteomic approaches which start with separating intact proteins first prior to mass spectrometric analysis of intact or digested proteins. The significant orthogonality of both modes may be used in the future for combining them in multidimensional fractionation procedures. |
Herres-Pawlis, Sonja; Birkenheuer, Georg; Brinkmann, André; Gesing, Sandra; Grunzke, Richard; Jäkel, René; Kohlbacher, Oliver; Krüger, Jens; dos Vieira, Ines Santos Workflow-enhanced conformational analysis of guanidine zinc complexes via a science gateway HealthGrid Applications and Technologies Meet Science Gateways for Life Sciences, pp. 142-151, IOS Press, 2012. @inproceedings{inproceedingsreference.2012-06-16.3030642358, title = {Workflow-enhanced conformational analysis of guanidine zinc complexes via a science gateway}, author = {Sonja Herres-Pawlis and Georg Birkenheuer and André Brinkmann and Sandra Gesing and Richard Grunzke and René Jäkel and Oliver Kohlbacher and Jens Krüger and Ines dos Santos Vieira}, url = {http://www.booksonline.iospress.nl/Content/View.aspx?piid=30461}, year = {2012}, date = {2012-01-01}, booktitle = {HealthGrid Applications and Technologies Meet Science Gateways for Life Sciences}, volume = {175}, pages = {142-151}, publisher = {IOS Press}, series = {Studies in Health Technology and Informatics, PubMed ID: 22942005}, abstract = {The new science gateway MoSGrid (Molecular Simulation Grid) enables users to submit and process molecular simulation studies on a large scale. A conformational analysis of guanidine zinc complexes, which are active catalysts in the ring-opening polymerization of lactide, is presented as an example. Such a large-scale quantum chemical study is enabled by workflow technologies. Two times 40 conformers have been generated, for two guanidine zinc complexes. Their structures were optimized using Gaussian 03 and the energies parsed within the quantum chemistry portlet of the MoSGrid portal. All meta- and post-processing steps have been performed in this portlet. All workflow features are implemented via WS-PGRADE and submitted to UNICORE.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The new science gateway MoSGrid (Molecular Simulation Grid) enables users to submit and process molecular simulation studies on a large scale. A conformational analysis of guanidine zinc complexes, which are active catalysts in the ring-opening polymerization of lactide, is presented as an example. Such a large-scale quantum chemical study is enabled by workflow technologies. Two times 40 conformers have been generated, for two guanidine zinc complexes. Their structures were optimized using Gaussian 03 and the energies parsed within the quantum chemistry portlet of the MoSGrid portal. All meta- and post-processing steps have been performed in this portlet. All workflow features are implemented via WS-PGRADE and submitted to UNICORE. |
Birkenheuer, Georg; Blunk, Dirk; Breuers, Sebastian; Brinkmann, André; dos Vieira, Ines Santos; Fels, Gregor; Gesing, Sandra; Grunzke, Richard; Herres-Pawlis, Sonja; Kohlbacher, Oliver; Krüger, Jens; Lang, Ulrich; Packschies, Lars; Müller-Pfefferkorn, Ralph; Schäfer, Patrick; Steinke, Thomas; Warzecha, Klaus; Wewior, Martin MoSGrid: Efficient Data Management and a Standardized Data Exchange Format for Molecular Simulations in a Grid Environment Journal of Cheminformatics, 4 (Suppl 1), pp. P21, 2012. @article{articlereference.2012-06-16.2672740696, title = {MoSGrid: Efficient Data Management and a Standardized Data Exchange Format for Molecular Simulations in a Grid Environment}, author = {Georg Birkenheuer and Dirk Blunk and Sebastian Breuers and André Brinkmann and Ines dos Santos Vieira and Gregor Fels and Sandra Gesing and Richard Grunzke and Sonja Herres-Pawlis and Oliver Kohlbacher and Jens Krüger and Ulrich Lang and Lars Packschies and Ralph Müller-Pfefferkorn and Patrick Schäfer and Thomas Steinke and Klaus Warzecha and Martin Wewior}, url = {http://www.jcheminf.com/content/4/S1/P21}, year = {2012}, date = {2012-01-01}, journal = {Journal of Cheminformatics}, volume = {4}, number = {Suppl 1}, pages = {P21}, abstract = {The MoSGrid (Molecular Simulation Grid) project is currently establishing a platform that aims to be used by both experienced and inexperienced researchers to submit molecular simulation calculations, monitor their progress, and retrieve the results. It provides a web-based portal to easily set up, run, and evaluate molecular simulations carried out on D-Grid resources. The range of applications available encompasses quantum chemistry, molecular dynamics, and protein-ligand docking codes. In addition, data repositories were developed, which contain the results of calculations as well as “recipes” or workflows. These can be used, improved, and distributed by the users. A distributed high-throughput file system allows efficient access to large amounts of data in the repositories. For storing both the input and output of the calculations, we have developed MSML (Molecular Simulation Markup Language), a CML derivative (Chemical Markup Language). MSML has been designed to store structural information on small as well as large molecules and results from various molecular simulation tools and docking tools. It ensures interoperability of different tools through a consistent data representation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The MoSGrid (Molecular Simulation Grid) project is currently establishing a platform that aims to be used by both experienced and inexperienced researchers to submit molecular simulation calculations, monitor their progress, and retrieve the results. It provides a web-based portal to easily set up, run, and evaluate molecular simulations carried out on D-Grid resources. The range of applications available encompasses quantum chemistry, molecular dynamics, and protein-ligand docking codes. In addition, data repositories were developed, which contain the results of calculations as well as “recipes” or workflows. These can be used, improved, and distributed by the users. A distributed high-throughput file system allows efficient access to large amounts of data in the repositories. For storing both the input and output of the calculations, we have developed MSML (Molecular Simulation Markup Language), a CML derivative (Chemical Markup Language). MSML has been designed to store structural information on small as well as large molecules and results from various molecular simulation tools and docking tools. It ensures interoperability of different tools through a consistent data representation. |
Gesing, Sandra; Glatard, Tristan; Krüger, Jens; Olabarriaga, Silvia Delgado; Solomonides, Tony; Silverstein, Jonathan; Montagnat, Johan; Gaignard, Alban; Krefting, Dagmar (Ed.) HealthGrid Applications and Technologies Meet Science Gateways for Life Sciences IOS Press, 175 , 2012. @proceedings{proceedingsreference.2012-06-16.0799943682, title = {HealthGrid Applications and Technologies Meet Science Gateways for Life Sciences}, editor = {Sandra Gesing and Tristan Glatard and Jens Krüger and Silvia Delgado Olabarriaga and Tony Solomonides and Jonathan Silverstein and Johan Montagnat and Alban Gaignard and Dagmar Krefting}, url = {http://www.booksonline.iospress.nl/Content/View.aspx?piid=30461}, year = {2012}, date = {2012-01-01}, volume = {175}, publisher = {IOS Press}, series = {Studies in Health Technology and Informatics}, abstract = {The Tenth HealthGrid Conference and the Fourth International Workshop on Science Gateways for Life Sciences (IWSG-Life) offer a forum to discuss the integration of grid, cloud, and other e-infrastructures into the fields of biology, bioinformatics, biomedicine, and healthcare. The program includes presentations, demos, and tutorials on a wide range of topics from technologies to biomedical research, and from portals to workflow and computational modeling. The principal objective of the HealthGrid conference is the exchange and debate of ideas, technologies, solutions, and requirements that interest the grid and life science communities and are likely to promote the integration of grids into biomedical research and health in the broadest sense. In 2012 the HealthGrid conference celebrates its tenth edition. IWSG-Life is a workshop series that focuses on research contributions for science gateways and tools in the field of life sciences. It brings together scientists from the fields of life sciences, bioinformatics, and computer science. It therefore forms an international forum to exchange experience, formulate ideas, and catch up on technological advances in computational biology and chemistry in the context of science gateways. The communities of both events overlap and in 2012 the events have been jointly organized such that attendees can benefit from synergies and will be stimulated to forge further links in their research areas. These proceedings record these events, their topics, and the peer reviewed papers and abstracts. Part I includes the contributions accepted to the HealthGrid conference in the form of oral paper presentations, tutorials, and demonstrations. Part II contains the papers about various aspects of the development and usage of science gateways for life sciences. The joint session is represented by Part III, which addresses the topic of science gateways for biomedical research.}, keywords = {}, pubstate = {published}, tppubtype = {proceedings} } The Tenth HealthGrid Conference and the Fourth International Workshop on Science Gateways for Life Sciences (IWSG-Life) offer a forum to discuss the integration of grid, cloud, and other e-infrastructures into the fields of biology, bioinformatics, biomedicine, and healthcare. The program includes presentations, demos, and tutorials on a wide range of topics from technologies to biomedical research, and from portals to workflow and computational modeling. The principal objective of the HealthGrid conference is the exchange and debate of ideas, technologies, solutions, and requirements that interest the grid and life science communities and are likely to promote the integration of grids into biomedical research and health in the broadest sense. In 2012 the HealthGrid conference celebrates its tenth edition. IWSG-Life is a workshop series that focuses on research contributions for science gateways and tools in the field of life sciences. It brings together scientists from the fields of life sciences, bioinformatics, and computer science. It therefore forms an international forum to exchange experience, formulate ideas, and catch up on technological advances in computational biology and chemistry in the context of science gateways. The communities of both events overlap and in 2012 the events have been jointly organized such that attendees can benefit from synergies and will be stimulated to forge further links in their research areas. These proceedings record these events, their topics, and the peer reviewed papers and abstracts. Part I includes the contributions accepted to the HealthGrid conference in the form of oral paper presentations, tutorials, and demonstrations. Part II contains the papers about various aspects of the development and usage of science gateways for life sciences. The joint session is represented by Part III, which addresses the topic of science gateways for biomedical research. |
Schlemmer, Tobias; Grunzke, Richard; Gesing, Sandra; Krüger, Jens; Birkenheuer, Georg; Müller-Pfefferkorn, Ralph; Kohlbacher, Oliver Generic User Management for Science Gateways via Virtual Organizations EGI Technical Forum 2012, 2012. @conference{conferencereference.2012-07-23.7887365592, title = {Generic User Management for Science Gateways via Virtual Organizations}, author = {Tobias Schlemmer and Richard Grunzke and Sandra Gesing and Jens Krüger and Georg Birkenheuer and Ralph Müller-Pfefferkorn and Oliver Kohlbacher}, url = {https://indico.egi.eu/indico/contributionDisplay.py?contribId=125&confId=1019}, year = {2012}, date = {2012-01-01}, booktitle = {EGI Technical Forum 2012}, abstract = {In general, science gateways provide features to access domain-specific applications on distributed computing infrastructures (DCIs). Independent of the technology used for authentication of a user to the science gateway, the membership in a virtual organization (VO) mostly defines which DCIs are available to the user. The MoSGrid project (Molecular Simulation Grid) has developed a workflow-enabled science gateway based on Liferay and WS-PGRADE. It helps users in the complex tasks of configuration and performance of molecular simulations on DCIs. In order to improve the user experience, the login process is being optimized. Currently, a Centralised Authentication Service (CAS) automatically authenticates users according to their X.509 certificate stored in the web browser. To significantly improve the usability, works are underway to authorize users based on their membership in a VO. Thus, users are automatically offered suitable features and DCIs.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } In general, science gateways provide features to access domain-specific applications on distributed computing infrastructures (DCIs). Independent of the technology used for authentication of a user to the science gateway, the membership in a virtual organization (VO) mostly defines which DCIs are available to the user. The MoSGrid project (Molecular Simulation Grid) has developed a workflow-enabled science gateway based on Liferay and WS-PGRADE. It helps users in the complex tasks of configuration and performance of molecular simulations on DCIs. In order to improve the user experience, the login process is being optimized. Currently, a Centralised Authentication Service (CAS) automatically authenticates users according to their X.509 certificate stored in the web browser. To significantly improve the usability, works are underway to authorize users based on their membership in a VO. Thus, users are automatically offered suitable features and DCIs. |
Feldhahn, Magdalena; Dönne, Pierre; Schubert, Benjamin; Schilbach, Karin; Rammensee, Hans-Georg; Kohlbacher, Oliver miHA-Match: computational detection of tissue-specific minor histocompatibility antigens J. Immunol. Meth., 386 (1-2), pp. 94-100, 2012. @article{miHA-Match-JIM, title = {miHA-Match: computational detection of tissue-specific minor histocompatibility antigens}, author = {Magdalena Feldhahn and Pierre Dönne and Benjamin Schubert and Karin Schilbach and Hans-Georg Rammensee and Oliver Kohlbacher}, doi = {https://doi.org/10.1016/j.jim.2012.09.004}, year = {2012}, date = {2012-01-01}, journal = {J. Immunol. Meth.}, volume = {386}, number = {1-2}, pages = {94-100}, abstract = {Allogenic stem cell transplantation has shown considerable success in a number of hematological malignancies, in particular in leukemia. The beneficial effect is mediated by donor T cells recognizing patient-specific HLA-binding peptides. These peptides are called minor histocompatibility antigens (miHAs) and are typically caused by single nucleotide polymorphisms. Tissue-specific miHAs have successfully been used in anti-tumor therapy without causing unspecific graft-versus-host reactions. However, only a small number of miHAs have been identified to date, limiting the clinical use. Here we present an immunoinformatics pipeline for the identification of miHAs. The pipeline can be applied to large-scale miHA screening, for example, in the development of diagnostic tests. Another interesting application is the design of personalized miHA-based cancer therapies based on patient-donor pair-specific miHAs detected by this pipeline. The suggested method covers various aspects of genetic variant detection, effects of alternative transcripts, and HLA-peptide binding. A comparison of our computational pipeline and experimentally derived datasets shows excellent agreement and coverage of the computationally predicted miHAs.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Allogenic stem cell transplantation has shown considerable success in a number of hematological malignancies, in particular in leukemia. The beneficial effect is mediated by donor T cells recognizing patient-specific HLA-binding peptides. These peptides are called minor histocompatibility antigens (miHAs) and are typically caused by single nucleotide polymorphisms. Tissue-specific miHAs have successfully been used in anti-tumor therapy without causing unspecific graft-versus-host reactions. However, only a small number of miHAs have been identified to date, limiting the clinical use. Here we present an immunoinformatics pipeline for the identification of miHAs. The pipeline can be applied to large-scale miHA screening, for example, in the development of diagnostic tests. Another interesting application is the design of personalized miHA-based cancer therapies based on patient-donor pair-specific miHAs detected by this pipeline. The suggested method covers various aspects of genetic variant detection, effects of alternative transcripts, and HLA-peptide binding. A comparison of our computational pipeline and experimentally derived datasets shows excellent agreement and coverage of the computationally predicted miHAs. |
Gesing, Sandra; Grunzke, Richard; Krüger, Jens; Birkenheuer, Georg; Wewior, Martin; Schäfer, Patrick; Schuller, Bernd; Schuster, Johannes; Herres-Pawlis, Sonja; Breuers, Sebastian; Balasko, Akos; Kozlovszky, Miklos; Fabri, Anna Szikszay; Packschies, Lars; Kacsuk, Peter; Blunk, Dirk; Steinke, Thomas; Brinkmann, André; Fels, Gregor; Müller-Pfefferkorn, Ralph; Jäkel, René; Kohlbacher, Oliver A Single Sign-On Infrastructure for Science Gateways on a Use Case for Structural Bioinformatics Journal of Grid Computing, 10 (4), pp. 769-790., 2012. @article{articlereference.2012-09-11.5985570554, title = {A Single Sign-On Infrastructure for Science Gateways on a Use Case for Structural Bioinformatics}, author = {Sandra Gesing and Richard Grunzke and Jens Krüger and Georg Birkenheuer and Martin Wewior and Patrick Schäfer and Bernd Schuller and Johannes Schuster and Sonja Herres-Pawlis and Sebastian Breuers and Akos Balasko and Miklos Kozlovszky and Anna Szikszay Fabri and Lars Packschies and Peter Kacsuk and Dirk Blunk and Thomas Steinke and André Brinkmann and Gregor Fels and Ralph Müller-Pfefferkorn and René Jäkel and Oliver Kohlbacher}, doi = {http://dx.doi.org/10.1007/s10723-012-9247-y}, year = {2012}, date = {2012-01-01}, journal = {Journal of Grid Computing}, volume = {10}, number = {4}, pages = {769-790.}, abstract = {Structural bioinformatics applies computational methods to analyze and model threedimensional molecular structures. There is a huge number of applications available to work with structural data on large scale. Using these tools on distributed computing infrastructures (DCIs), however, is often complicated due to a lack of suitable interfaces. The MoSGrid (Molecular Simulation Grid) science gateway provides an intuitive user interface to several widely-used applications for structural bioinformatics, molecular modeling, and quantum chemistry. It ensures the confidentiality, integrity, and availability of data via a granular security concept, which covers all layers of the infrastructure. The security concept applies SAML (Security Assertion Markup Language) and allows trust delegation from the user interface layer across the highlevel middleware layer and the grid middleware layer down to the HPC facilities. SAML assertions had to be integrated into the MoSGrid infrastructure in several places: the workflow-enabled grid portal WS-PGRADE (Web Services Parallel Grid Runtime and Developer Environment), the gUSE (grid User Support Environment) DCI services, and the cloud file system XtreemFS. The presented security infrastructure allows a single sign-on process to all involved DCI components and, therefore, lowers the hurdle for users to utilize large HPC infrastructures for structural bioinformatics.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Structural bioinformatics applies computational methods to analyze and model threedimensional molecular structures. There is a huge number of applications available to work with structural data on large scale. Using these tools on distributed computing infrastructures (DCIs), however, is often complicated due to a lack of suitable interfaces. The MoSGrid (Molecular Simulation Grid) science gateway provides an intuitive user interface to several widely-used applications for structural bioinformatics, molecular modeling, and quantum chemistry. It ensures the confidentiality, integrity, and availability of data via a granular security concept, which covers all layers of the infrastructure. The security concept applies SAML (Security Assertion Markup Language) and allows trust delegation from the user interface layer across the highlevel middleware layer and the grid middleware layer down to the HPC facilities. SAML assertions had to be integrated into the MoSGrid infrastructure in several places: the workflow-enabled grid portal WS-PGRADE (Web Services Parallel Grid Runtime and Developer Environment), the gUSE (grid User Support Environment) DCI services, and the cloud file system XtreemFS. The presented security infrastructure allows a single sign-on process to all involved DCI components and, therefore, lowers the hurdle for users to utilize large HPC infrastructures for structural bioinformatics. |
Böcker, Sebastian; Briesemeister, Sebastian; Klau, Gunnar W Exact Algorithms for Cluster Editing: Evaluation and Experiments Algorithmica, 60 (2), pp. 316-334, 2011, (A preliminary version of this paper appeared under the title 'Exact algorithms for cluster editing: Evaluation and experiments' in the Proceedings of the 7th Workshop on Experimental Algorithms, WEA 2008, in: LNCS, vol. 5038, Springer, pp. 289-302). @article{ArticleReference.2009-03-25.8799675334, title = {Exact Algorithms for Cluster Editing: Evaluation and Experiments}, author = {Sebastian Böcker and Sebastian Briesemeister and Gunnar W Klau}, url = {http://dx.doi.org/10.1007/s00453-009-9339-7}, year = {2011}, date = {2011-01-01}, journal = {Algorithmica}, volume = {60}, number = {2}, pages = {316-334}, abstract = {The Cluster Editing problem is defined as follows: Given an undirected, loopless graph, we want to find a set of edge modications (insertions and deletions) of minimum cardinality, such that the modied graph consists of disjoint cliques. We present empirical results for this problem using exact methods from fixed-parameter algorithmics and linear programming. We investigate parameter-independent data reduction methods and find that effective preprocessing is possible if the number of edge modications k is smaller than some multiple of |V|, where V is the vertex set of the input graph. In particular, combining parameter-dependent data reduction with lower and upper bounds we can effectively reduce graphs satisfying k <= 25|V|. In addition to the fastest known fixed-parameter branching strategy for the problem, we investigate an integer linear program (ILP) formulation of the problem using a cutting plane approach. Our results indicate that both approaches are capable of solving large graphs with 1000 vertices and several thousand edge modications. For the first time, complex and very large graphs such as biological instances allow for an exact solution, using a combination of the above techniques.}, note = {A preliminary version of this paper appeared under the title 'Exact algorithms for cluster editing: Evaluation and experiments' in the Proceedings of the 7th Workshop on Experimental Algorithms, WEA 2008, in: LNCS, vol. 5038, Springer, pp. 289-302}, keywords = {}, pubstate = {published}, tppubtype = {article} } The Cluster Editing problem is defined as follows: Given an undirected, loopless graph, we want to find a set of edge modications (insertions and deletions) of minimum cardinality, such that the modied graph consists of disjoint cliques. We present empirical results for this problem using exact methods from fixed-parameter algorithmics and linear programming. We investigate parameter-independent data reduction methods and find that effective preprocessing is possible if the number of edge modications k is smaller than some multiple of |V|, where V is the vertex set of the input graph. In particular, combining parameter-dependent data reduction with lower and upper bounds we can effectively reduce graphs satisfying k <= 25|V|. In addition to the fastest known fixed-parameter branching strategy for the problem, we investigate an integer linear program (ILP) formulation of the problem using a cutting plane approach. Our results indicate that both approaches are capable of solving large graphs with 1000 vertices and several thousand edge modications. For the first time, complex and very large graphs such as biological instances allow for an exact solution, using a combination of the above techniques. |
Martens, Lennart; Chambers, Matthew; Sturm, Marc; Kessner, Darren; Levander, Fredrik; Shofstahl, Jim; Tang, Wilfried H; Römpp, Andreas; Neumann, Steffen; Pizarro, Angel D; Montecchi-Palazzi, Luisa; Tasman, Natalie; Coleman, Mike; Reisinger, Florian; Souda, Puneet; Hermjakob, Henning; Binz, Pierre-Alain; Deutsch, Eric W mzML – a Community Standard for Mass Spectrometry Data Mol. Cell. Prot., 10 (1), pp. R110.000133, 2011. @article{mzML-MCP, title = {mzML – a Community Standard for Mass Spectrometry Data}, author = {Lennart Martens and Matthew Chambers and Marc Sturm and Darren Kessner and Fredrik Levander and Jim Shofstahl and Wilfried H Tang and Andreas Römpp and Steffen Neumann and Angel D Pizarro and Luisa Montecchi-Palazzi and Natalie Tasman and Mike Coleman and Florian Reisinger and Puneet Souda and Henning Hermjakob and Pierre-Alain Binz and Eric W Deutsch}, url = {http://dx.doi.org/10.1074/mcp.R110.000133}, year = {2011}, date = {2011-01-01}, journal = {Mol. Cell. Prot.}, volume = {10}, number = {1}, pages = {R110.000133}, abstract = {Mass spectrometry is a fundamental tool for discovery and analysis in the life sciences. With the rapid advances in mass spectrometry technology and methods, it has become imperative to provide a standard output format for mass spectrometry data that will facilitate data sharing and analysis. Initially, the efforts to develop a standard format for mass spectrometry data resulted in multiple formats, each designed with a different underlying philosophy. To resolve the issues associated with having multiple formats, vendors, researchers, and software developers convened under the banner of the HUPO PSI to develop a single standard. The new data format incorporated many of the desirable technical attributes from the previous data formats, while adding a number of improvements, including features such as a controlled vocabulary with validation tools to ensure consistent usage of the format, improved support for selected reaction monitoring data, and immediately available implementations to facilitate rapid adoption by the community. The resulting standard data format, mzML, is a well tested open-source format for mass spectrometer output files that can be readily utilized by the community and easily adapted for incremental advances in mass spectrometry technology.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Mass spectrometry is a fundamental tool for discovery and analysis in the life sciences. With the rapid advances in mass spectrometry technology and methods, it has become imperative to provide a standard output format for mass spectrometry data that will facilitate data sharing and analysis. Initially, the efforts to develop a standard format for mass spectrometry data resulted in multiple formats, each designed with a different underlying philosophy. To resolve the issues associated with having multiple formats, vendors, researchers, and software developers convened under the banner of the HUPO PSI to develop a single standard. The new data format incorporated many of the desirable technical attributes from the previous data formats, while adding a number of improvements, including features such as a controlled vocabulary with validation tools to ensure consistent usage of the format, improved support for selected reaction monitoring data, and immediately available implementations to facilitate rapid adoption by the community. The resulting standard data format, mzML, is a well tested open-source format for mass spectrometer output files that can be readily utilized by the community and easily adapted for incremental advances in mass spectrometry technology. |
Gesing, Sandra; van Hemert, Jano; Kacsuk, Peter; Kohlbacher, Oliver Special Issue: Portals for life sciences—Providing intuitive access to bioinformatic tools Concurrency and Computation: Practice and Experience, 23 (3), pp. 223–234, 2011. @article{IWPLS10-Editorial, title = {Special Issue: Portals for life sciences—Providing intuitive access to bioinformatic tools}, author = {Sandra Gesing and Jano van Hemert and Peter Kacsuk and Oliver Kohlbacher}, url = {http://onlinelibrary.wiley.com/doi/10.1002/cpe.1687/full}, year = {2011}, date = {2011-01-01}, journal = {Concurrency and Computation: Practice and Experience}, volume = {23}, number = {3}, pages = {223–234}, abstract = {We set the scene for the special issue on scientific portals in the context of Life Sciences. This paper introduces most of the key aspects that were raised during the 2-day workshop held at the Edinburgh e-Science Institute in September 2009. It also provides a balanced overview of tools and technologies out there that are mature and ready for use. The further papers in the special issue will explore several of the concepts in more detail and provide compelling use cases where portals were taken full advantage of.}, keywords = {}, pubstate = {published}, tppubtype = {article} } We set the scene for the special issue on scientific portals in the context of Life Sciences. This paper introduces most of the key aspects that were raised during the 2-day workshop held at the Edinburgh e-Science Institute in September 2009. It also provides a balanced overview of tools and technologies out there that are mature and ready for use. The further papers in the special issue will explore several of the concepts in more detail and provide compelling use cases where portals were taken full advantage of. |
Bertsch, Andreas; Gröpl, Clemens; Reinert, Knut; Kohlbacher, Oliver OpenMS and TOPP: Open Source Software for LC-MS Data Analysis Methods Mol. Biol., 696 , pp. 353-67, 2011. @article{MMB-TOPP-2011, title = {OpenMS and TOPP: Open Source Software for LC-MS Data Analysis}, author = {Andreas Bertsch and Clemens Gröpl and Knut Reinert and Oliver Kohlbacher}, doi = {https://doi.org/10.1007/978-1-60761-987-1_23}, year = {2011}, date = {2011-01-01}, journal = {Methods Mol. Biol.}, volume = {696}, pages = {353-67}, abstract = {Proteomics experiments based on state-of-the-art mass spectrometry produce vast amounts of data, which cannot be analyzed manually. Hence, software is needed which is able to analyze the data in an automated fashion. The need for robust and reusable software tools triggered the development of libraries implementing different algorithms for the various analysis steps. OpenMS is such a software library and provides a wealth of data structures and algorithms for the analysis of mass spectrometric data. For users unfamiliar with programming, TOPP ("The OpenMS Proteomics Pipeline") offers a wide range of already implemented tools sharing the same interface and designed for a specific analysis task each. TOPP thus makes the sophisticated algorithms of OpenMS accessible to nonprogrammers. The individual TOPP tools can be strung together into pipelines for analyzing mass spectrometry-based experiments starting from the raw output of the mass spectrometer. These analysis pipelines can be constructed using a graphical editor. Even complex analytical workflows can thus be analyzed with ease.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Proteomics experiments based on state-of-the-art mass spectrometry produce vast amounts of data, which cannot be analyzed manually. Hence, software is needed which is able to analyze the data in an automated fashion. The need for robust and reusable software tools triggered the development of libraries implementing different algorithms for the various analysis steps. OpenMS is such a software library and provides a wealth of data structures and algorithms for the analysis of mass spectrometric data. For users unfamiliar with programming, TOPP ("The OpenMS Proteomics Pipeline") offers a wide range of already implemented tools sharing the same interface and designed for a specific analysis task each. TOPP thus makes the sophisticated algorithms of OpenMS accessible to nonprogrammers. The individual TOPP tools can be strung together into pipelines for analyzing mass spectrometry-based experiments starting from the raw output of the mass spectrometer. These analysis pipelines can be constructed using a graphical editor. Even complex analytical workflows can thus be analyzed with ease. |
Bielow, Chris; Gröpl, Clemens; Kohlbacher, Oliver; Reinert, Knut Bioinformatics for Qualitative and Quantitative Proteomics Mayer, Bernd (Ed.): Bioinformatics for Omics Data: Methods and Protocols, Chapter 15, pp. 1-19, Springer, 2011. @inbook{BookChapter-BIoinformaticsForOmics, title = {Bioinformatics for Qualitative and Quantitative Proteomics}, author = {Chris Bielow and Clemens Gröpl and Oliver Kohlbacher and Knut Reinert}, editor = {Bernd Mayer}, url = {http://www.springer.com/life+sciences/bioinformatics/book/978-1-61779-026-3}, year = {2011}, date = {2011-01-01}, booktitle = {Bioinformatics for Omics Data: Methods and Protocols}, pages = {1-19}, publisher = {Springer}, chapter = {15}, series = {Methods in Molecular Biology, vol.719}, abstract = {Mass spectrometry is today a key analytical technique to elucidate the amount and content of proteins expressed in a certain cellular context. The degree of automation in proteomics has yet to reach that of genomic techniques, but even current technologies make a manual inspection of the data infeasible. This article addresses the key algorithmic problems bioinformaticians face when handling modern proteomic samples and shows common solutions to them. We provide examples on how algorithms can be combined to build relatively complex analysis pipelines, point out certain pitfalls and aspects worth considering and give a list of current state-of-the-art tools.}, keywords = {}, pubstate = {published}, tppubtype = {inbook} } Mass spectrometry is today a key analytical technique to elucidate the amount and content of proteins expressed in a certain cellular context. The degree of automation in proteomics has yet to reach that of genomic techniques, but even current technologies make a manual inspection of the data infeasible. This article addresses the key algorithmic problems bioinformaticians face when handling modern proteomic samples and shows common solutions to them. We provide examples on how algorithms can be combined to build relatively complex analysis pipelines, point out certain pitfalls and aspects worth considering and give a list of current state-of-the-art tools. |
Canzar, Stefan; Toussaint, Nora C; Klau, Gunnar W An exact algorithm for side-chain placement in protein design Optimization Letters, pp. 1-14, 2011. @article{canzar_toussaint_klau_ol, title = {An exact algorithm for side-chain placement in protein design}, author = {Stefan Canzar and Nora C Toussaint and Gunnar W Klau}, url = {http://dx.doi.org/10.1007/s11590-011-0308-0}, year = {2011}, date = {2011-01-01}, journal = {Optimization Letters}, pages = {1-14}, abstract = {Computational protein design aims at constructing novel or im- proved functions on the structure of a given protein backbone and has im- portant applications in the pharmaceutical and biotechnical industry. The underlying combinatorial side-chain placement problem consists of choosing a side-chain placement for each residue position such that the resulting over- all energy is minimum. The choice of the side-chain then also determines the amino acid for this position. Many algorithms for this NP-hard problem have been proposed in the context of homology modeling, which, however, reach their limits when faced with large protein design instances. In this paper, we propose a new exact method for the side-chain placement problem that works well even for large instance sizes as they appear in protein design. Our main contribution is a dedicated branch-and-bound algorithm that combines tight upper and lower bounds resulting from a novel Lagrangian relaxation approach for side-chain placement. Our experimental results show that our method outperforms alternative state-of-the-art exact approaches and makes it possible to solve large protein design instances.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Computational protein design aims at constructing novel or im- proved functions on the structure of a given protein backbone and has im- portant applications in the pharmaceutical and biotechnical industry. The underlying combinatorial side-chain placement problem consists of choosing a side-chain placement for each residue position such that the resulting over- all energy is minimum. The choice of the side-chain then also determines the amino acid for this position. Many algorithms for this NP-hard problem have been proposed in the context of homology modeling, which, however, reach their limits when faced with large protein design instances. In this paper, we propose a new exact method for the side-chain placement problem that works well even for large instance sizes as they appear in protein design. Our main contribution is a dedicated branch-and-bound algorithm that combines tight upper and lower bounds resulting from a novel Lagrangian relaxation approach for side-chain placement. Our experimental results show that our method outperforms alternative state-of-the-art exact approaches and makes it possible to solve large protein design instances. |
Gesing, Sandra; Kacsuk, Peter; Kozlovszky, Miklos; Birkenheuer, Georg; Blunk, Dirk; Breuers, Sebastian; Brinkmann, Andre; Fels, Gregor; Grunzke, Richard; Herres-Pawlis, Sonja; Krüger, Jens; Packschies, Lars; Müller-Pfefferkorn, Ralf; Schäfer, Patrick; Steinke, Thomas; Fabri, Anna Szikszay; Warzecha, Klaus; Wewior, Martin; Kohlbacher, Oliver A Science Gateway for Molecular Simulations EGI (European Grid Infrastructure) User Forum 2011, Book of Abstracts, pp. 94–95, ISBN 978 90 816927 1 7, 2011. @inproceedings{inproceedingsreference.2011-03-20.0365321064, title = {A Science Gateway for Molecular Simulations}, author = {Sandra Gesing and Peter Kacsuk and Miklos Kozlovszky and Georg Birkenheuer and Dirk Blunk and Sebastian Breuers and Andre Brinkmann and Gregor Fels and Richard Grunzke and Sonja Herres-Pawlis and Jens Krüger and Lars Packschies and Ralf Müller-Pfefferkorn and Patrick Schäfer and Thomas Steinke and Anna Szikszay Fabri and Klaus Warzecha and Martin Wewior and Oliver Kohlbacher}, year = {2011}, date = {2011-01-01}, booktitle = {EGI (European Grid Infrastructure) User Forum 2011, Book of Abstracts}, pages = {94–95}, publisher = {ISBN 978 90 816927 1 7}, abstract = {Nowadays, scientists in multiple research areas like material science, structural biology, and drug design are supported by invaluable molecular simulation tools. These tools allow analyzing increasingly complex chemical structures on high-performance computing facilities. However, the lack of graphical user interfaces, the limitation of the usability of the tools and the complexity of infrastructures demands intuitive user interfaces. The project MoSGrid (Molecular Simulation Grid) addresses these issues by combining an easy to use portal-based infrastructure with expert knowledge on the correct use of the complex methods. The emerging portal will support the user in all stages of the simulation process with easy access to data repositories storing information about molecular properties and the possibility of creating, editing and invoking workflows. The project integrates the UNICORE 6 grid middleware and the cloud file system XtreemFS into the workflow-enabled grid portal WS-PGRADE.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Nowadays, scientists in multiple research areas like material science, structural biology, and drug design are supported by invaluable molecular simulation tools. These tools allow analyzing increasingly complex chemical structures on high-performance computing facilities. However, the lack of graphical user interfaces, the limitation of the usability of the tools and the complexity of infrastructures demands intuitive user interfaces. The project MoSGrid (Molecular Simulation Grid) addresses these issues by combining an easy to use portal-based infrastructure with expert knowledge on the correct use of the complex methods. The emerging portal will support the user in all stages of the simulation process with easy access to data repositories storing information about molecular properties and the possibility of creating, editing and invoking workflows. The project integrates the UNICORE 6 grid middleware and the cloud file system XtreemFS into the workflow-enabled grid portal WS-PGRADE. |
Feldhahn, Magdalena; Menzel, Moritz; Weide, Benjamin; Bauer, Peter; Meckbach, Diana; Garbe, Claus; Kohlbacher, Oliver; Bauer, Jürgen No evidence of viral genomes in whole-transcriptome sequencings of three melanoma metastases Exp. Dermatol., 20 (9), pp. 766-768, 2011. @article{ExpDermatologyMelanoma, title = {No evidence of viral genomes in whole-transcriptome sequencings of three melanoma metastases}, author = {Magdalena Feldhahn and Moritz Menzel and Benjamin Weide and Peter Bauer and Diana Meckbach and Claus Garbe and Oliver Kohlbacher and Jürgen Bauer}, doi = {http://dx.doi.org/10.1111/j.1600-0625.2011.01312.x}, year = {2011}, date = {2011-01-01}, journal = {Exp. Dermatol.}, volume = {20}, number = {9}, pages = {766-768}, abstract = {Several viruses are known to cause cancer, such as human herpes virus 8 in Kaposi sarcoma and human papilloma viruses in cervical cancer. Recently, Merkel cell polyoma virus (MCPyV) has been described in 80% of Merkel cell carcinomas (MCC). Similarly to MCC and Kaposi sarcoma, melanoma incidence is increased in immunosuppressed patients. We asked whether infection by known or yet unknown viruses may play a role in melanoma development as well. To detect viral sequences expressed in melanoma cells, we analysed three melanoma metastases by whole-transcriptome sequencing and digital transcriptome subtraction. None of the samples investigated harboured viral sequences. In contrast, artificial viral sequences and MCPyV transcripts used as a positive control for the bioinformatics analysis were detected. This renders it less likely that viruses are frequently involved in melanoma induction. A larger number of melanoma transcriptome sequencings are required to rule out viruses as a relevant pathogen.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Several viruses are known to cause cancer, such as human herpes virus 8 in Kaposi sarcoma and human papilloma viruses in cervical cancer. Recently, Merkel cell polyoma virus (MCPyV) has been described in 80% of Merkel cell carcinomas (MCC). Similarly to MCC and Kaposi sarcoma, melanoma incidence is increased in immunosuppressed patients. We asked whether infection by known or yet unknown viruses may play a role in melanoma development as well. To detect viral sequences expressed in melanoma cells, we analysed three melanoma metastases by whole-transcriptome sequencing and digital transcriptome subtraction. None of the samples investigated harboured viral sequences. In contrast, artificial viral sequences and MCPyV transcripts used as a positive control for the bioinformatics analysis were detected. This renders it less likely that viruses are frequently involved in melanoma induction. A larger number of melanoma transcriptome sequencings are required to rule out viruses as a relevant pathogen. |
Röttig, Marc; Medema, Marnix; Blin, Kai; Weber, Tilmann; Rausch, Christian; Kohlbacher, Oliver NRPSpredictor2 - a webserver for predicting NRPS adenylation domain specificity Nucl. Acids Res., 39 (webserver issue), pp. W362-W367, 2011. @article{NRPSP2-NAR, title = {NRPSpredictor2 - a webserver for predicting NRPS adenylation domain specificity}, author = {Marc Röttig and Marnix Medema and Kai Blin and Tilmann Weber and Christian Rausch and Oliver Kohlbacher}, doi = {https://doi.org/10.1093/nar/gkr323}, year = {2011}, date = {2011-01-01}, journal = {Nucl. Acids Res.}, volume = {39}, number = {webserver issue}, pages = {W362-W367}, abstract = {The products of many bacterial non-ribosomal peptide synthetases (NRPS) are highly important secondary metabolites, including vancomycin and other antibiotics. The ability to predict substrate specificity of newly detected NRPS Adenylation (A-) domains by genome sequencing efforts is of great importance to identify and annotate new gene clusters that produce secondary metabolites. Prediction of A-domain specificity based on the sequence alone can be achieved through sequence signatures or, more accurately, through machine learning methods. We present an improved predictor, based on previous work (NRPSpredictor), that predicts A-domain specificity using Support Vector Machines on four hierarchical levels, ranging from gross physicochemical properties of an A-domain’s substrates down to single amino acid substrates. The three more general levels are predicted with an F-measure better than 0.89 and the most detailed level with an average F-measure of 0.80. We also modeled the applicability domain of our predictor to estimate for new A-domains whether they lie in the applicability domain. Finally, since there are also NRPS that play an important role in natural products chemistry of fungi, such as peptaibols and cephalosporins, we added a predictor for fungal A-domains, which predicts gross physicochemical properties with an F-measure of 0.84. The service is available at http://nrps.informatik.uni-tuebingen.de/.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The products of many bacterial non-ribosomal peptide synthetases (NRPS) are highly important secondary metabolites, including vancomycin and other antibiotics. The ability to predict substrate specificity of newly detected NRPS Adenylation (A-) domains by genome sequencing efforts is of great importance to identify and annotate new gene clusters that produce secondary metabolites. Prediction of A-domain specificity based on the sequence alone can be achieved through sequence signatures or, more accurately, through machine learning methods. We present an improved predictor, based on previous work (NRPSpredictor), that predicts A-domain specificity using Support Vector Machines on four hierarchical levels, ranging from gross physicochemical properties of an A-domain’s substrates down to single amino acid substrates. The three more general levels are predicted with an F-measure better than 0.89 and the most detailed level with an average F-measure of 0.80. We also modeled the applicability domain of our predictor to estimate for new A-domains whether they lie in the applicability domain. Finally, since there are also NRPS that play an important role in natural products chemistry of fungi, such as peptaibols and cephalosporins, we added a predictor for fungal A-domains, which predicts gross physicochemical properties with an F-measure of 0.84. The service is available at http://nrps.informatik.uni-tuebingen.de/. |
Gesing, Sandra; Grunzke, Richard; Balasko, Akos; Birkenheuer, Georg; Blunk, Dirk; Breuers, Sebastian; Brinkmann, André; Fels, Gregor; Herres-Pawlis, Sonja; Kacsuk, Peter; Kozlovszky, Miklos; Krüger, Jens; Packschies, Lars; Schäfer, Patrick; Schuller, Bernd; Schuster, Johannes; Steinke, Thomas; Fabri, Anna Szikszay; Wewior, Martin; Müller-Pfefferkorn, Ralph; Kohlbacher, Oliver Granular Security for a Science Gateway in Structural Bioinformatics Proc. IWSG-Life 2011, 2011. (BibTeX) @inproceedings{GranularSec_IWSG11, title = {Granular Security for a Science Gateway in Structural Bioinformatics}, author = {Sandra Gesing and Richard Grunzke and Akos Balasko and Georg Birkenheuer and Dirk Blunk and Sebastian Breuers and André Brinkmann and Gregor Fels and Sonja Herres-Pawlis and Peter Kacsuk and Miklos Kozlovszky and Jens Krüger and Lars Packschies and Patrick Schäfer and Bernd Schuller and Johannes Schuster and Thomas Steinke and Anna Szikszay Fabri and Martin Wewior and Ralph Müller-Pfefferkorn and Oliver Kohlbacher}, year = {2011}, date = {2011-01-01}, booktitle = {Proc. IWSG-Life 2011}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Toussaint, Nora C; Feldhahn, Magdalena; Ziehm, Matthias; Stevanovic, Stefan; Kohlbacher, Oliver T-Cell Epitope Prediction Based on Self-Tolerance Proc. ICIW 2011, 2011. @conference{ICIW11, title = {T-Cell Epitope Prediction Based on Self-Tolerance}, author = {Nora C Toussaint and Magdalena Feldhahn and Matthias Ziehm and Stefan Stevanovic and Oliver Kohlbacher}, year = {2011}, date = {2011-01-01}, booktitle = {Proc. ICIW 2011}, abstract = {T-cell epitopes, i.e., peptides capable of inducing a T-cell mediated immune response, represent suitable components for vaccines against infectious diseases and cancer. The development of accurate T-cell epitope prediction methods is thus of great interest to immunologists and the pharmaceutical industry. Whether a particular peptide is a T-cell epitope depends on the availability of (a) an MHC molecule capable of presenting the peptide on the cell surface and (b) a suitable T cell. In order to ensure self-tolerance of the immune system, T cells reactive to self-peptides are eliminated via negative selection processes. The composition of the Tcell repertoire thus depends on the host proteome. These complex dependencies along with a lack of data render Tcell epitope prediction a rather challenging problem. It is commonly reduced to the simpler MHC binding prediction problem. While state-of-the-art MHC binding prediction methods are highly accurate, the actual prediction of T-cell epitopes leaves room for improvement. Previously proposed approaches to T-cell epitope prediction do not take the dependencies on the host proteome into account but utilize peptide sequence information only. Their low prediction accuracies can be attributed to this limited view on T-cell reactivity and to a biased data basis.}, keywords = {}, pubstate = {published}, tppubtype = {conference} } T-cell epitopes, i.e., peptides capable of inducing a T-cell mediated immune response, represent suitable components for vaccines against infectious diseases and cancer. The development of accurate T-cell epitope prediction methods is thus of great interest to immunologists and the pharmaceutical industry. Whether a particular peptide is a T-cell epitope depends on the availability of (a) an MHC molecule capable of presenting the peptide on the cell surface and (b) a suitable T cell. In order to ensure self-tolerance of the immune system, T cells reactive to self-peptides are eliminated via negative selection processes. The composition of the Tcell repertoire thus depends on the host proteome. These complex dependencies along with a lack of data render Tcell epitope prediction a rather challenging problem. It is commonly reduced to the simpler MHC binding prediction problem. While state-of-the-art MHC binding prediction methods are highly accurate, the actual prediction of T-cell epitopes leaves room for improvement. Previously proposed approaches to T-cell epitope prediction do not take the dependencies on the host proteome into account but utilize peptide sequence information only. Their low prediction accuracies can be attributed to this limited view on T-cell reactivity and to a biased data basis. |
Nahnsen, Sven; Bertsch, Andreas; Rahnenführer, Jörg; Nordheim, Alfred; Kohlbacher, Oliver Probabilistic Consensus Scoring Improves Tandem Mass Spectrometry Peptide Identification J. Proteome Res., 10 (8), pp. 3332-3343, 2011. @article{ConID_JPR, title = {Probabilistic Consensus Scoring Improves Tandem Mass Spectrometry Peptide Identification}, author = {Sven Nahnsen and Andreas Bertsch and Jörg Rahnenführer and Alfred Nordheim and Oliver Kohlbacher}, url = {http://pubs.acs.org/doi/abs/10.1021/pr2002879}, year = {2011}, date = {2011-01-01}, journal = {J. Proteome Res.}, volume = {10}, number = {8}, pages = {3332-3343}, abstract = {Database search is a standard technique for identifying peptides from their tandem mass spectra. In order to increase the number of correctly identified peptides, we suggest a probabilistic framework that allows the combination of scores from different search engines into a joint consensus score. Central to the approach is a novel method to estimate scores for peptides not found by an individual search engine. This approach allows the estimation of p-values for each candidate peptide and their combination across all search engines. The consensus approach works better than any single search engine across all different instrument types considered in this study. Improvements vary strongly from platform to platform and from search engine to search engine. Compared to the industry standard MASCOT, our approach can identify up 60% more peptides. The software for consensus predictions is implemented in C++ as part of OpenMS, a software framework for mass spectrometry. The source code is available in the current development version of OpenMS and can easily be used as a command line application or via a graphical pipeline designer TOPPAS.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Database search is a standard technique for identifying peptides from their tandem mass spectra. In order to increase the number of correctly identified peptides, we suggest a probabilistic framework that allows the combination of scores from different search engines into a joint consensus score. Central to the approach is a novel method to estimate scores for peptides not found by an individual search engine. This approach allows the estimation of p-values for each candidate peptide and their combination across all search engines. The consensus approach works better than any single search engine across all different instrument types considered in this study. Improvements vary strongly from platform to platform and from search engine to search engine. Compared to the industry standard MASCOT, our approach can identify up 60% more peptides. The software for consensus predictions is implemented in C++ as part of OpenMS, a software framework for mass spectrometry. The source code is available in the current development version of OpenMS and can easily be used as a command line application or via a graphical pipeline designer TOPPAS. |
Birkenheuer, Georg; Blunk, Dirk; Breuers, Sebastian; Brinkmann, Andre; dos Vieira, Ines Santos; Fels, Gregor; Gesing, Sandra; Grunzke, Richard; Herres-Pawlis, Sonja; Kohlbacher, Oliver; Krüger, Jens; Lang, Ulrich; Packschies, Lars; Müller-Pfefferkorn, Ralf; Schäfer, Patrick; Schmalz, Hans-Günther; Steinke, Thomas; Warzecha, Klaus; Wewior, Martin A Molecular Simulation Grid as new tool for Computational Chemistry, Biology and Material Science Journal of Cheminformatics 2011, 3 (Suppl 1), P14 2011, (DOI:10.1186/1758-2946-3-S1-P14). @conference{conferencereference.2011-06-12.4129948178, title = {A Molecular Simulation Grid as new tool for Computational Chemistry, Biology and Material Science}, author = {Georg Birkenheuer and Dirk Blunk and Sebastian Breuers and Andre Brinkmann and Ines dos Santos Vieira and Gregor Fels and Sandra Gesing and Richard Grunzke and Sonja Herres-Pawlis and Oliver Kohlbacher and Jens Krüger and Ulrich Lang and Lars Packschies and Ralf Müller-Pfefferkorn and Patrick Schäfer and Hans-Günther Schmalz and Thomas Steinke and Klaus Warzecha and Martin Wewior}, url = {http://www.jcheminf.com/content/3/S1/P14}, year = {2011}, date = {2011-01-01}, booktitle = {Journal of Cheminformatics 2011}, volume = {3}, number = {Suppl 1}, series = {P14}, abstract = {The MoSGrid (Molecular Simulation Grid) project aims to provide remote computational chemistry services within the German Grid Initiative (D-Grid). Submission and monitoring of compute jobs, as well as the retrieval of postprocessed results are realized through a web based portal. The use of standardized portlets and a generally modular approach allows for the simultaneous and independent implementation of frontends for different molecular simulation codes. To date, functional prototypes of portlets for applications from the quantum chemical and the molecular dynamics domain are available, being represented by Gaussian and Gromacs, respectively. The implementation of other quantum chemical codes, as requested by the community, and of codes for docking simulations is in preparation. MoSGrid will furthermore foster efficient and collaborative work by providing secure but shareable repositories for validated data, as well as for reusable recipes and workflows.}, note = {DOI:10.1186/1758-2946-3-S1-P14}, keywords = {}, pubstate = {published}, tppubtype = {conference} } The MoSGrid (Molecular Simulation Grid) project aims to provide remote computational chemistry services within the German Grid Initiative (D-Grid). Submission and monitoring of compute jobs, as well as the retrieval of postprocessed results are realized through a web based portal. The use of standardized portlets and a generally modular approach allows for the simultaneous and independent implementation of frontends for different molecular simulation codes. To date, functional prototypes of portlets for applications from the quantum chemical and the molecular dynamics domain are available, being represented by Gaussian and Gromacs, respectively. The implementation of other quantum chemical codes, as requested by the community, and of codes for docking simulations is in preparation. MoSGrid will furthermore foster efficient and collaborative work by providing secure but shareable repositories for validated data, as well as for reusable recipes and workflows. |
Wörmann, K; Lucio, M; Forcisi, S; Schmitt-Kopplin, P; Kenar, E; Kohlbacher, O; Franken, H; Rosenbaum, L; Zell, A; Lehmann, R; Häring, HU Verbund "BIOMARKERS" - Metabolomics im BMBF Kompetenznetz Diabetes Diabetes, Stoffwechsel und Herz, 20 (3), pp. 178-184, 2011. @article{BIOMARKERS, title = {Verbund "BIOMARKERS" - Metabolomics im BMBF Kompetenznetz Diabetes}, author = {K Wörmann and M Lucio and S Forcisi and P Schmitt-Kopplin and E Kenar and O Kohlbacher and H Franken and L Rosenbaum and A Zell and R Lehmann and HU Häring}, year = {2011}, date = {2011-01-01}, journal = {Diabetes, Stoffwechsel und Herz}, volume = {20}, number = {3}, pages = {178-184}, abstract = {„Krankheiten befallen uns nicht aus heiterem Himmel, sondern entwickeln sich aus täglichen Sünden wider die Natur. Wenn sich diese gehäuft haben, brechen sie unversehens hervor“. Vor dem Hintergrund des epidemisch sich ausbreitenden Typ 2 Diabetes und der Jahre bis Jahrzehnte vorausgehenden prä-diabetischen Phase bei scheinbar gesunden Menschen ist dieses Zitat von Hippokrates aktueller denn je. Deutschland hat nach Angaben der Internationalen Diabetes-Föderation aus dem Jahre 2010 die höchste Diabetesprävalenz in Europa (1). In Deutschland gibt es derzeit ca. 8 Millionen Diabetes-Patienten. Die Anzahl der sogenannten Prä-Diabetiker, d.h. Individuen, bei denen aufgrund eines bereits veränderten Metabolismus die Glukosetoleranz gestört ist, beträgt nochmals ca. 8 Millionen. Berücksichtigt man hierbei noch Menschen mit Insulinresistenz, erhöht sich die Anzahl der Prä-Diabetiker auf geschätzte 15 Millionen. Diesen Zahlen gegenüber stehen die klinischen Folgen des Krankheitsbildes Diabetes, die von enormer medizinischer und sozio-ökonomischer Relevanz sind. Trotzdem gibt es bisher kein einfaches diagnostisches Laborverfahren um den Prä-Diabetes zu diagnostizieren und durch frühzeitige Intervention die Manifestation des Diabetes zu verzögern oder gar zu verhindern.}, keywords = {}, pubstate = {published}, tppubtype = {article} } „Krankheiten befallen uns nicht aus heiterem Himmel, sondern entwickeln sich aus täglichen Sünden wider die Natur. Wenn sich diese gehäuft haben, brechen sie unversehens hervor“. Vor dem Hintergrund des epidemisch sich ausbreitenden Typ 2 Diabetes und der Jahre bis Jahrzehnte vorausgehenden prä-diabetischen Phase bei scheinbar gesunden Menschen ist dieses Zitat von Hippokrates aktueller denn je. Deutschland hat nach Angaben der Internationalen Diabetes-Föderation aus dem Jahre 2010 die höchste Diabetesprävalenz in Europa (1). In Deutschland gibt es derzeit ca. 8 Millionen Diabetes-Patienten. Die Anzahl der sogenannten Prä-Diabetiker, d.h. Individuen, bei denen aufgrund eines bereits veränderten Metabolismus die Glukosetoleranz gestört ist, beträgt nochmals ca. 8 Millionen. Berücksichtigt man hierbei noch Menschen mit Insulinresistenz, erhöht sich die Anzahl der Prä-Diabetiker auf geschätzte 15 Millionen. Diesen Zahlen gegenüber stehen die klinischen Folgen des Krankheitsbildes Diabetes, die von enormer medizinischer und sozio-ökonomischer Relevanz sind. Trotzdem gibt es bisher kein einfaches diagnostisches Laborverfahren um den Prä-Diabetes zu diagnostizieren und durch frühzeitige Intervention die Manifestation des Diabetes zu verzögern oder gar zu verhindern. |
Toussaint, Nora C; Maman, Yaakov; Kohlbacher, Oliver; Louzoun, Yoram Universal peptide vaccines – optimal peptide vaccine based on viral sequence conservation Vaccine, 29 (47), pp. 8745-8753, 2011. @article{vaccine_toussaint_2011, title = {Universal peptide vaccines – optimal peptide vaccine based on viral sequence conservation}, author = {Nora C Toussaint and Yaakov Maman and Oliver Kohlbacher and Yoram Louzoun}, doi = {https://doi.org/10.1016/j.vaccine.2011.07.132}, year = {2011}, date = {2011-01-01}, journal = {Vaccine}, volume = {29}, number = {47}, pages = {8745-8753}, abstract = {Rapidly mutating viruses such as the hepatitis C virus (HCV), the human immunodeficiency virus (HIV), or influenza viruses (Flu) call for highly effective universal peptide vaccines, i.e. vaccines that do not only yield broad population coverage but also broad coverage of various viral strains. The efficacy of such vaccines is determined by multiple properties of the epitopes they comprise. Beyond the specific properties of each epitope, properties of the corresponding source antigens are of great importance. If a response is mounted against viral proteins with a low copy number within the cell or against proteins expressed very late, this response may fail to induce lysis of the infected cells before budding can take place. We here propose a novel methodology to optimize the epitope composition and position in order to induce a maximal protection. In order for a peptide vaccine to yield the best possible universal protection, several conditions should be met: (a) an optimal choice of target antigens, (b) an optimal choice of highly conserved epitopes, (c) maximal coverage of the target population, and (d) the proper ordering of the epitopes in the final vaccine to ensure favorable cleavage. We propose a mathematical formalism for epitope selection and ordering that balances the constraints imposed by these different conditions. Focusing on HCV, HIV, and Flu, we show that not all of the conditions can be fulfilled for all viruses. For each virus, different constraints are harder to maintain: In Flu, the conservation constraint is breached first, while in HIV, the targeting of optimal proteins is difficult to achieve. The proposed methodology can be applied to any virus to assess the feasibility of optimally combining the above-mentioned constraints.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Rapidly mutating viruses such as the hepatitis C virus (HCV), the human immunodeficiency virus (HIV), or influenza viruses (Flu) call for highly effective universal peptide vaccines, i.e. vaccines that do not only yield broad population coverage but also broad coverage of various viral strains. The efficacy of such vaccines is determined by multiple properties of the epitopes they comprise. Beyond the specific properties of each epitope, properties of the corresponding source antigens are of great importance. If a response is mounted against viral proteins with a low copy number within the cell or against proteins expressed very late, this response may fail to induce lysis of the infected cells before budding can take place. We here propose a novel methodology to optimize the epitope composition and position in order to induce a maximal protection. In order for a peptide vaccine to yield the best possible universal protection, several conditions should be met: (a) an optimal choice of target antigens, (b) an optimal choice of highly conserved epitopes, (c) maximal coverage of the target population, and (d) the proper ordering of the epitopes in the final vaccine to ensure favorable cleavage. We propose a mathematical formalism for epitope selection and ordering that balances the constraints imposed by these different conditions. Focusing on HCV, HIV, and Flu, we show that not all of the conditions can be fulfilled for all viruses. For each virus, different constraints are harder to maintain: In Flu, the conservation constraint is breached first, while in HIV, the targeting of optimal proteins is difficult to achieve. The proposed methodology can be applied to any virus to assess the feasibility of optimally combining the above-mentioned constraints. |
Zhang, GL; Ansari, HR; Bradley, P; Cawley, GC; Hertz, T; Hu, X; Jojic, N; Kim, Y; Kohlbacher, O; Lund, O; Lundegaard, C; Magaret, CA; Nielsen, M; Papadopoulos, H; Raghava, GP; Tal, VS; Xue, L; Yanover, C; Zhu, S; Rock, MT; Crowe, JE Jr; Polycarpou, MM; Duch, W; Brusic, V Machine learning competition in immunology - Prediction of HLA class I molecules J. Immunol. Meth., 375 (1-2), pp. 1-4, 2011. @article{ICIWComp, title = {Machine learning competition in immunology - Prediction of HLA class I molecules}, author = {GL Zhang and HR Ansari and P Bradley and GC Cawley and T Hertz and X Hu and N Jojic and Y Kim and O Kohlbacher and O Lund and C Lundegaard and CA Magaret and M Nielsen and H Papadopoulos and GP Raghava and VS Tal and L Xue and C Yanover and S Zhu and MT Rock and JE Jr Crowe and MM Polycarpou and W Duch and V Brusic}, doi = {https://doi.org/10.1016/j.jim.2011.09.010}, year = {2011}, date = {2011-01-01}, journal = {J. Immunol. Meth.}, volume = {375}, number = {1-2}, pages = {1-4}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Tung, Chun-Wei; Ziehm, Matthias; Kämper, Andreas; Kohlbacher, Oliver; Ho, Shinn-Ying POPISK: T-cell reactivity prediction using support vector machines and string kernels BMC Bioinformatics, 12 , pp. 246, 2011. @article{POPI-SK, title = {POPISK: T-cell reactivity prediction using support vector machines and string kernels}, author = {Chun-Wei Tung and Matthias Ziehm and Andreas Kämper and Oliver Kohlbacher and Shinn-Ying Ho}, doi = {https://doi.org/10.1186/1471-2105-12-446}, year = {2011}, date = {2011-01-01}, journal = {BMC Bioinformatics}, volume = {12}, pages = {246}, abstract = {Background Accurate prediction of peptide immunogenicity and characterization of relation between peptide sequences and peptide immunogenicity will be greatly helpful for vaccine designs and understanding of the immune system. In contrast to the prediction of antigen processing and presentation pathway, the prediction of subsequent T-cell reactivity is a much harder topic. Previous studies of identifying T-cell receptor (TCR) recognition positions were based on small-scale analyses using only a few peptides and concluded different recognition positions such as positions 4, 6 and 8 of peptides with length 9. Large-scale analyses are necessary to better characterize the effect of peptide sequence variations on T-cell reactivity and design predictors of a peptide's T-cell reactivity (and thus immunogenicity). The identification and characterization of important positions influencing T-cell reactivity will provide insights into the underlying mechanism of immunogenicity. Results This work establishes a large dataset by collecting immunogenicity data from three major immunology databases. In order to consider the effect of MHC restriction, peptides are classified by their associated MHC alleles. Subsequently, a computational method (named POPISK) using support vector machine with a weighted degree string kernel is proposed to predict T-cell reactivity and identify important recognition positions. POPISK yields a mean 10-fold cross-validation accuracy of 68% in predicting T-cell reactivity of HLA-A2-binding peptides. POPISK capable of predicting immunogenicity with scores can also correctly predict the change in T-cell reactivity related to point mutations in epitopes reported in previous studies using crystal structures. Thorough analyses of the prediction results identify the important positions 4, 6, 8 and 9, and yield insights into the molecular basis for TCR recognition. Finally, we relate this finding to physicochemical properties and structural features of the MHC-peptide-TCR interaction. Conclusions A computational method POPISK is proposed to predict immunogenicity with scores which are useful for predicting immunogenicity changes made by single-residue modifications. The web server of POPISK is freely available at http://iclab.life.nctu.edu.tw/POPISK.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Background Accurate prediction of peptide immunogenicity and characterization of relation between peptide sequences and peptide immunogenicity will be greatly helpful for vaccine designs and understanding of the immune system. In contrast to the prediction of antigen processing and presentation pathway, the prediction of subsequent T-cell reactivity is a much harder topic. Previous studies of identifying T-cell receptor (TCR) recognition positions were based on small-scale analyses using only a few peptides and concluded different recognition positions such as positions 4, 6 and 8 of peptides with length 9. Large-scale analyses are necessary to better characterize the effect of peptide sequence variations on T-cell reactivity and design predictors of a peptide's T-cell reactivity (and thus immunogenicity). The identification and characterization of important positions influencing T-cell reactivity will provide insights into the underlying mechanism of immunogenicity. Results This work establishes a large dataset by collecting immunogenicity data from three major immunology databases. In order to consider the effect of MHC restriction, peptides are classified by their associated MHC alleles. Subsequently, a computational method (named POPISK) using support vector machine with a weighted degree string kernel is proposed to predict T-cell reactivity and identify important recognition positions. POPISK yields a mean 10-fold cross-validation accuracy of 68% in predicting T-cell reactivity of HLA-A2-binding peptides. POPISK capable of predicting immunogenicity with scores can also correctly predict the change in T-cell reactivity related to point mutations in epitopes reported in previous studies using crystal structures. Thorough analyses of the prediction results identify the important positions 4, 6, 8 and 9, and yield insights into the molecular basis for TCR recognition. Finally, we relate this finding to physicochemical properties and structural features of the MHC-peptide-TCR interaction. Conclusions A computational method POPISK is proposed to predict immunogenicity with scores which are useful for predicting immunogenicity changes made by single-residue modifications. The web server of POPISK is freely available at http://iclab.life.nctu.edu.tw/POPISK. |
Albrecht, Mario; Kerren, Andreas; Klein, Karsten; Kohlbacher, Oliver; Mutzel, Petra; Paul, Wolfgang; Schreiber, Falk; Wybrow, Michael On Open Problems in Biological Network Visualization Springer Berlin Heidelberg, 2010. @proceedings{OnOpenProblemsinBiologicalNetworkVisualization, title = {On Open Problems in Biological Network Visualization}, author = {Mario Albrecht and Andreas Kerren and Karsten Klein and Oliver Kohlbacher and Petra Mutzel and Wolfgang Paul and Falk Schreiber and Michael Wybrow}, editor = {David Eppstein and Emden R Gansner}, url = {https://link.springer.com/chapter/10.1007/978-3-642-11805-0_25}, year = {2010}, date = {2010-01-01}, booktitle = {Graph Drawing}, pages = {256-267}, publisher = {Springer Berlin Heidelberg}, abstract = {Much of the data generated and analyzed in the life sciences can be interpreted and represented by networks or graphs. Network analysis and visualization methods help in investigating them, and many universal as well as special-purpose tools and libraries are available for this task. However, the two fields of graph drawing and network biology are still largely disconnected. Hence, visualization of biological networks does typically not apply state-of-the-art graph drawing techniques, and graph drawing tools do not respect the drawing conventions of the life science community.}, keywords = {}, pubstate = {published}, tppubtype = {proceedings} } Much of the data generated and analyzed in the life sciences can be interpreted and represented by networks or graphs. Network analysis and visualization methods help in investigating them, and many universal as well as special-purpose tools and libraries are available for this task. However, the two fields of graph drawing and network biology are still largely disconnected. Hence, visualization of biological networks does typically not apply state-of-the-art graph drawing techniques, and graph drawing tools do not respect the drawing conventions of the life science community. |
Trusch, Maria; Böhlick, Alexandra; Hildebrand, Diana; Lichtner, Björn; Bertsch, Andreas; Kohlbacher, Oliver; Bachmann, Sebastian; Schlüter, Hartmut Application of displacement chromatography for the analysis of a lipid raft proteome J. Chromatogr. B, 878 (3-4), pp. 309-314, 2010. @article{DC_JChrB2010, title = {Application of displacement chromatography for the analysis of a lipid raft proteome}, author = {Maria Trusch and Alexandra Böhlick and Diana Hildebrand and Björn Lichtner and Andreas Bertsch and Oliver Kohlbacher and Sebastian Bachmann and Hartmut Schlüter}, doi = {https://doi.org/10.1016/j.jchromb.2009.11.035}, year = {2010}, date = {2010-01-01}, journal = {J. Chromatogr. B}, volume = {878}, number = {3-4}, pages = {309-314}, abstract = {Defining membrane proteomes is fundamental to understand the role of membrane proteins in biological processes and to find new targets for drug development. Usually multidimensional chromatography using step or gradient elution is applied for the separation of tryptic peptides of membrane proteins prior to their mass spectrometric analysis. Displacement chromatography (DC) offers several advantages that are helpful for proteome analysis. However, DC has so far been applied for proteomic investigations only in few cases. In this study we therefore applied DC in a multidimensional LC-MS approach for the separation and identification of membrane proteins located in cholesterol-enriched membrane microdomains (lipid rafts) obtained from rat kidney by density gradient centrifugation. The tryptic peptides were separated on a cation-exchange column in the displacement mode with spermine used as displacer. Fractions obtained from DC were analyzed using an HPLC-chip system coupled to an electrospray-ionization ion-trap mass spectrometer. This procedure yielded more than 400 highly significant peptide spectrum matches and led to the identification of more than 140 reliable protein hits within an established rat kidney lipid raft proteome. The majority of identified proteins were membrane proteins. In sum, our results demonstrate that DC is a suitable alternative to gradient elution separations for the identification of proteins via a multidimensional LC-MS approach.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Defining membrane proteomes is fundamental to understand the role of membrane proteins in biological processes and to find new targets for drug development. Usually multidimensional chromatography using step or gradient elution is applied for the separation of tryptic peptides of membrane proteins prior to their mass spectrometric analysis. Displacement chromatography (DC) offers several advantages that are helpful for proteome analysis. However, DC has so far been applied for proteomic investigations only in few cases. In this study we therefore applied DC in a multidimensional LC-MS approach for the separation and identification of membrane proteins located in cholesterol-enriched membrane microdomains (lipid rafts) obtained from rat kidney by density gradient centrifugation. The tryptic peptides were separated on a cation-exchange column in the displacement mode with spermine used as displacer. Fractions obtained from DC were analyzed using an HPLC-chip system coupled to an electrospray-ionization ion-trap mass spectrometer. This procedure yielded more than 400 highly significant peptide spectrum matches and led to the identification of more than 140 reliable protein hits within an established rat kidney lipid raft proteome. The majority of identified proteins were membrane proteins. In sum, our results demonstrate that DC is a suitable alternative to gradient elution separations for the identification of proteins via a multidimensional LC-MS approach. |
Kirchler, Tobias; Briesemeister, Sebastian; Singer, Miriam; Schütze, Katja; Keinath, Melanie; Kohlbacher, Oliver; Vicente-Carbajosa, Jesus; Teige, Markus; Harter, Klaus; Chaban, Christina The role of phosphorylatable serine residues in the DNA-binding domain of Arabidopsis bZIP transcription factors Eur. J. Cell Biol., 89 (2-3), pp. 175-183, 2010. @article{bZip, title = {The role of phosphorylatable serine residues in the DNA-binding domain of Arabidopsis bZIP transcription factors}, author = {Tobias Kirchler and Sebastian Briesemeister and Miriam Singer and Katja Schütze and Melanie Keinath and Oliver Kohlbacher and Jesus Vicente-Carbajosa and Markus Teige and Klaus Harter and Christina Chaban}, doi = {https://doi.org/10.1016/j.ejcb.2009.11.023}, year = {2010}, date = {2010-01-01}, journal = {Eur. J. Cell Biol.}, volume = {89}, number = {2-3}, pages = {175-183}, abstract = {Reversible phosphorylation plays a crucial role in regulating the activity of enzymes and other proteins in all living organisms. Particularly, the phosphorylation of transcription factors can modulate their capability to regulate downstream target genes. In plants, basic domain-containing leucine-zipper (bZIP) transcription factors have an important function in the regulation of many developmental processes and adaptive responses to the environment. By a comprehensive sequence analysis, we identified a set of highly conserved, potentially phospho-accepting serines within the DNA-binding domain of plant bZIPs. Structural modelling revealed that these serines are in physical contact with the DNA and predicts that their phosphorylation will have a major influence on the DNA-binding activity of plant bZIPs. In support of this, we show, by means of a quantitative in vitro binding assay, that phosphorylation-mimicking substitutions of some of these serines strongly interfere with the DNA binding of two prototypical Arabidopsis bZIPs, namely AtZIP63 and HY5. Our data suggest that the identified serines could serve as in vivo targets for kinases and phosphatases, allowing the fine-tuning of bZIP factor activity at the DNA-protein interaction level.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Reversible phosphorylation plays a crucial role in regulating the activity of enzymes and other proteins in all living organisms. Particularly, the phosphorylation of transcription factors can modulate their capability to regulate downstream target genes. In plants, basic domain-containing leucine-zipper (bZIP) transcription factors have an important function in the regulation of many developmental processes and adaptive responses to the environment. By a comprehensive sequence analysis, we identified a set of highly conserved, potentially phospho-accepting serines within the DNA-binding domain of plant bZIPs. Structural modelling revealed that these serines are in physical contact with the DNA and predicts that their phosphorylation will have a major influence on the DNA-binding activity of plant bZIPs. In support of this, we show, by means of a quantitative in vitro binding assay, that phosphorylation-mimicking substitutions of some of these serines strongly interfere with the DNA binding of two prototypical Arabidopsis bZIPs, namely AtZIP63 and HY5. Our data suggest that the identified serines could serve as in vivo targets for kinases and phosphatases, allowing the fine-tuning of bZIP factor activity at the DNA-protein interaction level. |
Röttig, Marc; Rausch, Christian; Kohlbacher, Oliver Combining Structure and Sequence Information Allows Automated Prediction of Substrate Specificities within Enzyme Families PLoS Comput. Biol., 6 (1), pp. e1000636, 2010. @article{ASC_PLoSCB, title = {Combining Structure and Sequence Information Allows Automated Prediction of Substrate Specificities within Enzyme Families}, author = {Marc Röttig and Christian Rausch and Oliver Kohlbacher}, url = {http://www.ploscompbiol.org/article/info:doi/10.1371/journal.pcbi.1000636}, year = {2010}, date = {2010-01-01}, journal = {PLoS Comput. Biol.}, volume = {6}, number = {1}, pages = {e1000636}, abstract = {An important aspect of the functional annotation of enzymes is not only the type of reaction catalysed by an enzyme, but also the substrate specificity, which can vary widely within the same family. In many cases, prediction of family membership and even substrate specificity is possible from enzyme sequence alone, using a nearest neighbour classification rule. However, the combination of structural information and sequence information can improve the interpretability and accuracy of predictive models. The method presented here, Active Site Classification (ASC), automatically extracts the residues lining the active site from one representative three-dimensional structure and the corresponding residues from sequences of other members of the family. From a set of representatives with known substrate specificity, a Support Vector Machine (SVM) can then learn a model of substrate specificity. Applied to a sequence of unknown specificity, the SVM can then predict the most likely substrate. The models can also be analysed to reveal the underlying structural reasons determining substrate specificities and thus yield valuable insights into mechanisms of enzyme specificity. We illustrate the high prediction accuracy achieved on two benchmark data sets and the structural insights gained from ASC by a detailed analysis of the family of decarboxylating dehydrogenases. The ASC web service is available at http://asc.informatik.uni-tuebingen.de/.}, keywords = {}, pubstate = {published}, tppubtype = {article} } An important aspect of the functional annotation of enzymes is not only the type of reaction catalysed by an enzyme, but also the substrate specificity, which can vary widely within the same family. In many cases, prediction of family membership and even substrate specificity is possible from enzyme sequence alone, using a nearest neighbour classification rule. However, the combination of structural information and sequence information can improve the interpretability and accuracy of predictive models. The method presented here, Active Site Classification (ASC), automatically extracts the residues lining the active site from one representative three-dimensional structure and the corresponding residues from sequences of other members of the family. From a set of representatives with known substrate specificity, a Support Vector Machine (SVM) can then learn a model of substrate specificity. Applied to a sequence of unknown specificity, the SVM can then predict the most likely substrate. The models can also be analysed to reveal the underlying structural reasons determining substrate specificities and thus yield valuable insights into mechanisms of enzyme specificity. We illustrate the high prediction accuracy achieved on two benchmark data sets and the structural insights gained from ASC by a detailed analysis of the family of decarboxylating dehydrogenases. The ASC web service is available at http://asc.informatik.uni-tuebingen.de/. |
Gehlenborg, Nils; O'Donoghue, Sean I; Baliga, Nitin S; Goesmann, Alexander; Hibbs, Matthew A; Kitano, Hiroaki; Kohlbacher, Oliver; Neuweger, Heiko; Schneider, Reinhard; Tenenbaum, Dan; Gavin, Anne-Claude Visualization of omics data for systems biology Nat. Methods, 7 , pp. S56-68, 2010. @article{VizBiReviewNatureMethods, title = {Visualization of omics data for systems biology}, author = {Nils Gehlenborg and Sean I O'Donoghue and Nitin S Baliga and Alexander Goesmann and Matthew A Hibbs and Hiroaki Kitano and Oliver Kohlbacher and Heiko Neuweger and Reinhard Schneider and Dan Tenenbaum and Anne-Claude Gavin}, doi = {https://doi.org/10.1038/nmeth.1436}, year = {2010}, date = {2010-01-01}, journal = {Nat. Methods}, volume = {7}, pages = {S56-68}, abstract = {High-throughput studies of biological systems are rapidly accumulating a wealth of ‘omics’-scale data. Visualization is a key aspect of both the analysis and understanding of these data and users now have many visualization methods and tools to choose from. The challenge is to create clear, meaningful, and integrated visualizations that give biological insight, without being overwhelmed by the intrinsic complexity of the data. In this review, we discuss how visualization tools are being used to help interpret protein interaction, gene expression, and metabolic profile data, and we highlight emerging new directions.}, keywords = {}, pubstate = {published}, tppubtype = {article} } High-throughput studies of biological systems are rapidly accumulating a wealth of ‘omics’-scale data. Visualization is a key aspect of both the analysis and understanding of these data and users now have many visualization methods and tools to choose from. The challenge is to create clear, meaningful, and integrated visualizations that give biological insight, without being overwhelmed by the intrinsic complexity of the data. In this review, we discuss how visualization tools are being used to help interpret protein interaction, gene expression, and metabolic profile data, and we highlight emerging new directions. |
Bertsch, Andreas; Jung, Stephan; Zerck, Alexandra; Pfeifer, Nico; Nahnsen, Sven; Henneges, Carsten; Nordheim, Alfred; Kohlbacher, Oliver Optimal de novo design of MRM experiments for rapid assay development in targeted proteomics J. Proteome Res., 9 (5), pp. 2696-704, 2010. @article{MRM-Scheduling-JPR, title = {Optimal de novo design of MRM experiments for rapid assay development in targeted proteomics}, author = {Andreas Bertsch and Stephan Jung and Alexandra Zerck and Nico Pfeifer and Sven Nahnsen and Carsten Henneges and Alfred Nordheim and Oliver Kohlbacher}, url = {http://pubs.acs.org/doi/abs/10.1021/pr1001803}, year = {2010}, date = {2010-01-01}, journal = {J. Proteome Res.}, volume = {9}, number = {5}, pages = {2696-704}, abstract = {Targeted proteomic approaches such as multiple reaction monitoring (MRM) overcome problems associated with classical shotgun mass spectrometry experiments. Developing MRM quantitation assays can be time-consuming, because relevant peptide representatives of the proteins must be found and their retention time and the product ions must be determined. Given the transitions, hundreds to thousands of them can be scheduled into one experiment run. However, it is difficult to select which of the transitions should be included into a measurement. We present a novel algorithm that allows the construction of MRM assays from the sequence of the targeted proteins alone. This enables the rapid development of targeted MRM experiments without large libraries of transitions or peptide spectra. The approach relies on combinatorial optimization in combination with machine learning techniques to predict proteotypicity, retention time and fragmentation of peptides. The resulting potential transitions are scheduled optimally by solving an integer linear program. We demonstrate that fully automated construction of MRM experiments from protein sequences alone is possible and over 80% coverage of the targeted proteins can be achieved without further optimization of the assay.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Targeted proteomic approaches such as multiple reaction monitoring (MRM) overcome problems associated with classical shotgun mass spectrometry experiments. Developing MRM quantitation assays can be time-consuming, because relevant peptide representatives of the proteins must be found and their retention time and the product ions must be determined. Given the transitions, hundreds to thousands of them can be scheduled into one experiment run. However, it is difficult to select which of the transitions should be included into a measurement. We present a novel algorithm that allows the construction of MRM assays from the sequence of the targeted proteins alone. This enables the rapid development of targeted MRM experiments without large libraries of transitions or peptide spectra. The approach relies on combinatorial optimization in combination with machine learning techniques to predict proteotypicity, retention time and fragmentation of peptides. The resulting potential transitions are scheduled optimally by solving an integer linear program. We demonstrate that fully automated construction of MRM experiments from protein sequences alone is possible and over 80% coverage of the targeted proteins can be achieved without further optimization of the assay. |