@unpublished{KhosraviSheikhKhozaniCooper, author = {Khosravi, Khabat and Sheikh Khozani, Zohreh and Cooper, James R.}, title = {Predicting stable gravel-bed river hydraulic geometry: A test of novel, advanced, hybrid data mining algorithms}, volume = {2021}, doi = {10.25643/bauhaus-universitaet.4499}, url = {http://nbn-resolving.de/urn:nbn:de:gbv:wim2-20211004-44998}, abstract = {Accurate prediction of stable alluvial hydraulic geometry, in which erosion and sedimentation are in equilibrium, is one of the most difficult but critical topics in the field of river engineering. Data mining algorithms have been gaining more attention in this field due to their high performance and flexibility. However, an understanding of the potential for these algorithms to provide fast, cheap, and accurate predictions of hydraulic geometry is lacking. This study provides the first quantification of this potential. Using at-a-station field data, predictions of flow depth, water-surface width and longitudinal water surface slope are made using three standalone data mining techniques -, Instance-based Learning (IBK), KStar, Locally Weighted Learning (LWL) - along with four types of novel hybrid algorithms in which the standalone models are trained with Vote, Attribute Selected Classifier (ASC), Regression by Discretization (RBD), and Cross-validation Parameter Selection (CVPS) algorithms (Vote-IBK, Vote-Kstar, Vote-LWL, ASC-IBK, ASC-Kstar, ASC-LWL, RBD-IBK, RBD-Kstar, RBD-LWL, CVPSIBK, CVPS-Kstar, CVPS-LWL). Through a comparison of their predictive performance and a sensitivity analysis of the driving variables, the results reveal: (1) Shield stress was the most effective parameter in the prediction of all geometry dimensions; (2) hybrid models had a higher prediction power than standalone data mining models, empirical equations and traditional machine learning algorithms; (3) Vote-Kstar model had the highest performance in predicting depth and width, and ASC-Kstar in estimating slope, each providing very good prediction performance. Through these algorithms, the hydraulic geometry of any river can potentially be predicted accurately and with ease using just a few, readily available flow and channel parameters. Thus, the results reveal that these models have great potential for use in stable channel design in data poor catchments, especially in developing nations where technical modelling skills and understanding of the hydraulic and sediment processes occurring in the river system may be lacking.}, subject = {Maschinelles Lernen}, language = {en} } @unpublished{MosaviTorabiHashemietal., author = {Mosavi, Amir and Torabi, Mehrnoosh and Hashemi, Sattar and Saybani, Mahmoud Reza and Shamshirband, Shahaboddin}, title = {A Hybrid Clustering and Classification Technique for Forecasting Short-Term Energy Consumption}, doi = {10.25643/bauhaus-universitaet.3755}, url = {http://nbn-resolving.de/urn:nbn:de:gbv:wim2-20180907-37550}, abstract = {Electrical energy distributor companies in Iran have to announce their energy demand at least three 3-day ahead of the market opening. Therefore, an accurate load estimation is highly crucial. This research invoked methodology based on CRISP data mining and used SVM, ANN, and CBA-ANN-SVM (a novel hybrid model of clustering with both widely used ANN and SVM) to predict short-term electrical energy demand of Bandarabbas. In previous studies, researchers introduced few effective parameters with no reasonable error about Bandarabbas power consumption. In this research we tried to recognize all efficient parameters and with the use of CBA-ANN-SVM model, the rate of error has been minimized. After consulting with experts in the field of power consumption and plotting daily power consumption for each week, this research showed that official holidays and weekends have impact on the power consumption. When the weather gets warmer, the consumption of electrical energy increases due to turning on electrical air conditioner. Also, con-sumption patterns in warm and cold months are different. Analyzing power consumption of the same month for different years had shown high similarity in power consumption patterns. Factors with high impact on power consumption were identified and statistical methods were utilized to prove their impacts. Using SVM, ANN and CBA-ANN-SVM, the model was built. Sine the proposed method (CBA-ANN-SVM) has low MAPE 5 1.474 (4 clusters) and MAPE 5 1.297 (3 clusters) in comparison with SVM (MAPE 5 2.015) and ANN (MAPE 5 1.790), this model was selected as the final model. The final model has the benefits from both models and the benefits of clustering. Clustering algorithm with discovering data structure, divides data into several clusters based on similarities and differences between them. Because data inside each cluster are more similar than entire data, modeling in each cluster will present better results. For future research, we suggest using fuzzy methods and genetic algorithm or a hybrid of both to forecast each cluster. It is also possible to use fuzzy methods or genetic algorithms or a hybrid of both without using clustering. It is issued that such models will produce better and more accurate results. This paper presents a hybrid approach to predict the electric energy usage of weather-sensitive loads. The presented methodutilizes the clustering paradigm along with ANN and SVMapproaches for accurate short-term prediction of electric energyusage, using weather data. Since the methodology beinginvoked in this research is based on CRISP data mining, datapreparation has received a gr eat deal of attention in thisresear ch. Once data pre-processing was done, the underlyingpattern of electric energy consumption was extracted by themeans of machine learning methods to precisely forecast short-term energy consumption. The proposed approach (CBA-ANN-SVM) was applied to real load data and resulting higher accu-racy comparing to the existing models. 2018 American Institute of Chemical Engineers Environ Prog, 2018 https://doi.org/10.1002/ep.12934}, subject = {Data Mining}, language = {en} } @phdthesis{Anderka, author = {Anderka, Maik}, title = {Analyzing and Predicting Quality Flaws in User-generated Content: The Case of Wikipedia}, doi = {10.25643/bauhaus-universitaet.1977}, url = {http://nbn-resolving.de/urn:nbn:de:gbv:wim2-20130709-19778}, school = {Bauhaus-Universit{\"a}t Weimar}, abstract = {Web applications that are based on user-generated content are often criticized for containing low-quality information; a popular example is the online encyclopedia Wikipedia. The major points of criticism pertain to the accuracy, neutrality, and reliability of information. The identification of low-quality information is an important task since for a huge number of people around the world it has become a habit to first visit Wikipedia in case of an information need. Existing research on quality assessment in Wikipedia either investigates only small samples of articles, or else deals with the classification of content into high-quality or low-quality. This thesis goes further, it targets the investigation of quality flaws, thus providing specific indications of the respects in which low-quality content needs improvement. The original contributions of this thesis, which relate to the fields of user-generated content analysis, data mining, and machine learning, can be summarized as follows: (1) We propose the investigation of quality flaws in Wikipedia based on user-defined cleanup tags. Cleanup tags are commonly used in the Wikipedia community to tag content that has some shortcomings. Our approach is based on the hypothesis that each cleanup tag defines a particular quality flaw. (2) We provide the first comprehensive breakdown of Wikipedia's quality flaw structure. We present a flaw organization schema, and we conduct an extensive exploratory data analysis which reveals (a) the flaws that actually exist, (b) the distribution of flaws in Wikipedia, and, (c) the extent of flawed content. (3) We present the first breakdown of Wikipedia's quality flaw evolution. We consider the entire history of the English Wikipedia from 2001 to 2012, which comprises more than 508 million page revisions, summing up to 7.9 TB. Our analysis reveals (a) how the incidence and the extent of flaws have evolved, and, (b) how the handling and the perception of flaws have changed over time. (4) We are the first who operationalize an algorithmic prediction of quality flaws in Wikipedia. We cast quality flaw prediction as a one-class classification problem, develop a tailored quality flaw model, and employ a dedicated one-class machine learning approach. A comprehensive evaluation based on human-labeled Wikipedia articles underlines the practical applicability of our approach.}, subject = {Data Mining}, language = {en} }