@inproceedings{1a709bb669c644b8970fedfa67254433,
title = "An MDL-Based Genetic Algorithm for Genome Sequence Compression",
abstract = "The exponential growth of genomic data has posed significant challenges for lossless compression of genome sequences. While recent reference-free genome compressors have shown promising results, they often fail to fully leverage the inherent sequential structure of genome sequences, require substantial computational resources and lack (or have limited) interpretability. This paper presents a novel genome compression method that employs the Minimum Description Length (MDL) principle, which is based on the idea that the best model for a given dataset is the one that provides the shortest description of that dataset. The proposed compressor, called GMG (Genetic algorithm for MDL-based Genome compression), integrates a genetic algorithm to identify optimal k-mers (patterns) in a model to best compress the genome data. Experimental results across various datasets demonstrate that GMG outperforms state-of-the-art genome compressors in terms of bits-per-base compression and computational efficiency. Furthermore, it is demonstrated that the optimal patterns identified by GMG for compression can also be utilized for genome classification, offering a multifunctional advantage over previous compressors. GMG is freely available at github.com/MuhammadzohaibNawaz/GMG",
keywords = "Crossover, GA, Genome sequences, MDL, Mutation",
author = "Nawaz, {M. Zohaib} and Nawaz, {M. Saqib} and Philippe Fournier-Viger and Tseng, {Vincent S.}",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2024 ; Conference date: 03-12-2024 Through 06-12-2024",
year = "2024",
doi = "10.1109/BIBM62325.2024.10821914",
language = "English",
series = "Proceedings - 2024 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2024",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "6724--6731",
editor = "Mario Cannataro and Huiru Zheng and Lin Gao and Jianlin Cheng and {de Miranda}, {Joao Luis} and Ester Zumpano and Xiaohua Hu and Young-Rae Cho and Taesung Park",
booktitle = "Proceedings - 2024 IEEE International Conference on Bioinformatics and Biomedicine, BIBM 2024",
address = "美國",
}