@phdthesis{Kiesel2022, author = {Kiesel, Johannes}, title = {Harnessing Web Archives to Tackle Selected Societal Challenges}, doi = {10.25643/bauhaus-universitaet.4660}, url = {http://nbn-resolving.de/urn:nbn:de:gbv:wim2-20220622-46602}, school = {Bauhaus-Universit{\"a}t Weimar}, year = {2022}, abstract = {With the growing importance of the World Wide Web, the major challenges our society faces are also increasingly affecting the digital areas of our lives. Some of the associated problems can be addressed by computer science, and some of these specifically by data-driven research. To do so, however, requires to solve open issues related to archive quality and the large volume and variety of the data contained. This dissertation contributes data, algorithms, and concepts towards leveraging the big data and temporal provenance capabilities of web archives to tackle societal challenges. We selected three such challenges that highlight the central issues of archive quality, data volume, and data variety, respectively: (1) For the preservation of digital culture, this thesis investigates and improves the automatic quality assurance of the web page archiving process, as well as the further processing of the resulting archive data for automatic analysis. (2) For the critical assessment of information, this thesis examines large datasets of Wikipedia and news articles and presents new methods for automatically determining quality and bias. (3) For digital security and privacy, this thesis exploits the variety of content on the web to quantify the security of mnemonic passwords and analyzes the privacy-aware re-finding of the various seen content through private web archives.}, subject = {Informatik}, language = {en} }