@article{ 
author = {Eskandari, Sadegh},  
title = {Online Streaming Feature Selection Using Geometric Series of the Adjacency Matrix of Features}, 
abstract ={Feature Selection (FS) is an important pre-processing step in machine learning and data mining. All the traditional feature selection methods assume that the entire feature space is available from the beginning. However, online streaming features (OSF) are an integral part of many real-world applications. In OSF, the number of training examples is fixed while the number of features grows with time as new features stream in. For instance, in the problem of semantic segmentation of images using texture-based features, the number of features can be infinitely growing. In these dynamically growing scenarios, a rudimentary approach is waiting a long time for all features to become available and then carry out the feature selection methods. However, due to the importance of optimal decisions at every time step, a more rational approach is to design an online streaming feature selection (OSFS) method which selects a best feature subset from so-far-seen information and updates the subset on the fly when new features stream in. Any OSFS method must satisfy three critical conditions; first, it should not require any domain knowledge about feature space, because the full feature space is unknown or inaccessible. Second, it should allow efficient incremental updates in selected features. Third, it should be as accurate as possible at each time instance to allow having reliable classification and learning tasks at that time instance. In this paper, OSFS is considered from the geometric series of features adjacency matrix and, a new OSFS algorithm called OSFS-GS is proposed. This algorithm ranks features based on path integrals and the centrality concept on an online feature adjacency graph. The most appealing characteristics of the proposed algorithm are; 1) all possible subsets of features are considered in evaluating the rank of a given feature, 2) it is extremely efficient, as it converts the feature ranking problem to simply calculating the geometric series of an adjacency matrix and 3) beside selected features subset, it uses a redundant features subset that provides the reconsideration of good features at different time instances. This algorithm is compared with three state-of-the-art OSFS algorithms, namely information-investing, fast-OSFS and OSFSMI. The information-investing algorithm is an embedded online feature selection method that considers the feature selection as a part of learning process. This algorithm selects a new incoming feature if it reduces the model entropy more than the cost of the feature coding. The fast-OSFS algorithm is a filter method that gradually generates a Markov-blanket of feature space using causality-based measures. For any new incoming feature, this algorithm executes two processes: an online relevance analysis and then an online redundancy analysis. OSFSMI is a similar algorithm to fast-OSFS, in which uses information theory for feature analysis. The algorithms are extensively evaluated on eight high-dimensional datasets in terms of compactness, classification accuracy and run-time. In order to provide OSF scenario, features are considered one by one. Moreover, in order to strengthen the comparison, the results are averaged over 30 random streaming orders. Experimental results demonstrate that OSFS-GS algorithm achieves better accuracies than the three existing OSFS algorithms.},  
Keywords = {Streaming Features, Feature Selection, Geometric Series},
volume = {17},
Number = {4}, 
pages = {3-14}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.3},
url = {http://jsdp.rcisp.ac.ir/article-1-942-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-942-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {riazi, ladan and pourebrahimi, alireza and alborzi, mahmood and radfar, rez},  
title = {A hybrid method to improve Steganography in JPEG images using metaheuristic algorithms}, 
abstract ={This paper presents a method for improving steganography and enhancing the security using combinatorial Meta-heuristic algorithms. The goal is to achieve an improved PSNR value in order to preserve the image quality in the steganography process. Steganography algorithms, in order to insert message signal information inside the host data, create small changes based on the message signal in the host data, so that they are not visible to the human eye. Each cryptographic algorithm has two steps: insert a stego signal and extract it. You can use the area of the spatial or transformation area to insert the stego signal. Extraction can be done using the correlation with the original watermark or independently of it. Clearly, the choice of insertion method and how to extract are interdependent. In spatial techniques, information is stored directly in pixel color intensity but in the transform domain, the image is initially converted to another domain (such as frequency), and then the information is embedded in the conversion coefficients. Using optimization algorithms based on Metahuristic algorithms in this field is widely used and many researchers have been encouraged to use it. Using a suitable fitness function, these methods are useful in the design of steganography algorithms. In this research, seven commonly used Metahuristic algorithms, including ant colony, bee, cuckoo search, genetics, Particle Swarm Optimization, Simulated Annealing and firefly were selected and the performance of these algorithms is evaluated individually on existing data after being applied individually. Among the applied algorithms, cuckoo search, firefly and bee algorithms that have the best fitness function and therefore the highest quality were selected. All 6 different modes of combining these 3 algorithms were separately examined. The best combination is the firefly, bee and cuckoo search algorithms, which provides a mean signal-to-noise ratio of 54.89. The proposed combination compared to the individual algorithms of optimization of ant colony, bee, cuckoo search, genetics, Particle Swarm Optimization, Simulated Annealing and firefly, provides 59.29, 29.61, 37.43, 52.56, 54.84, 57.82, and 3.82% improvement in the PSNR value.},  
Keywords = {steganography, Metahuristic algorithms, firefly algorithm, bee algorithm, cuckoo search algorithms},
volume = {17},
Number = {4}, 
pages = {15-32}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.15},
url = {http://jsdp.rcisp.ac.ir/article-1-936-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-936-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {khojasteh, fatemeh and Kahani, Mohsen and Behkamal, Behashi},  
title = {Concept drift detection in business process logs using deep learning}, 
abstract ={Process mining provides a bridge between process modeling and analysis on the one hand and data mining on the other hand. Process mining aims at discovering, monitoring, and improving real processes by extracting knowledge from event logs. However, as most business processes change over time (e.g. the effects of new legislation, seasonal effects and etc.), traditional process mining techniques cannot capture such &#8220;second-order dynamics&#8221; and analyze these processes as if they are in steady-state. Such changes can significantly impact the performance of processes. Hence, for the process management, it is crucial that changes in processes be discovered and analyzed. Process change detection is also known as business process drift detection. All the existing methods for process drift detection are dependent on the size of windows used for detecting changes. Identifying convenient features that characterize the relations between traces or events is another challenge in most methods. In this thesis, we propose an automated and window-independent approach for detecting sudden business process drifts by introducing the notion of trace embedding. Using trace embedding makes it possible to automatically extract all features from the relations between traces. We show that the proposed approach outperforms all the existing methods in respect of its significantly higher accuracy and lower detection delay.},  
Keywords = {process mining, concept drifts, process changes, word embedding},
volume = {17},
Number = {4}, 
pages = {33-48}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.33},
url = {http://jsdp.rcisp.ac.ir/article-1-912-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-912-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {abbasi, zeinab and rahmani, mohsen and ghaffarian, hossei},  
title = {IFSB-ReliefF: A New Instance and Feature Selection Algorithm Based on ReliefF}, 
abstract ={Increasing the use of Internet and some phenomena such as sensor networks has led to an unnecessary increasing the volume of information. Though it has many benefits, it causes problems such as storage space requirements and better processors, as well as data refinement to remove unnecessary data. Data reduction methods provide ways to select useful data from a large amount of duplicate, incomplete and redundant data. These methods are often applied in the pre-processing phase of machine learning algorithms. Three types of data reduction methods can be applied to data: 1. Feature reduction.2. Instance reduction: 3. Discretizing feature values. In this paper, a new algorithm, based on ReliefF, is introduced to decrease both instances and features. The proposed algorithm can run on nominal and numeric features and on data sets with missing values. In addition, in this algorithm, the selection of instances from each class is proportional to the prior probability of classes. The proposed algorithm can run parallel on a multi-core CPU, which decreases the runtime significantly and has the ability to run on big data sets. One type of instance reduction is instance selection. There are many issues in designing instance selection algorithms such as representing the reduced set, how to make a subset of instances, choosing distance function, evaluating designed reduction algorithm, the size of reduced data set and determining the critical and border instances. There are three ways of creating a subset of instances. 1) Incremental. 2) Decremental. 3) Batch. In this paper, we use the batch way for selecting instances. Another important issue is measuring the similarity of instances by a distance function. We use Jaccard index and Manhattan distance for measuring. Also, the decision on how many and what kind of instances should be removed and which must remain is another important issue. The goal of this paper is reducing the size of the stored set of instances while maintaining the quality of dataset. So, we remove very similar and non-border instances in terms of the specified reduction rate. The other type of data reduction that is performed in our algorithm is feature selection. Feature selection methods divide into three categories: wrapper methods, filter methods, and hybrid methods. Many feature selection algorithms are introduced. According to many parameters, these algorithms are divided into different categories; For example, based on the search type for the optimal subset of the features, they can be categorized into three categories: Exponential Search, Sequential Search, and Random Search. Also, an assessment of a feature or a subset of features is done to measure its usefulness and relevance by the evaluation measures that are categorized into various metrics such as distance, accuracy, consistency, information, etc. ReliefF is a feature selection algorithm used for calculating a weight for each feature and ranking features. But this paper is used ReliefF for ranking instances and features. This algorithm works as follows: First, the nearest neighbors of each instances are found. Then, based on the evaluation function, for each instance and feature, a weight is calculated, and eventually, the features and instances that are more weighed are retained and the rest are eliminated. IFSB-ReliefF (Instance and Feature Selection Based on ReliefF) algorithm is tested on two datasets and then C4.5 algorithm classifies the reduced data. Finally, the obtained results from the classification of reduced data sets are compared with the results of some instance and feature selection algorithms that are run separately.},  
Keywords = {data reduction, instance selection, feature selection, ReliefF},
volume = {17},
Number = {4}, 
pages = {49-66}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.49},
url = {http://jsdp.rcisp.ac.ir/article-1-902-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-902-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {Veisi, Hadi and Ghoreishi, Sayed Akbar and Bastanfard, Azam},  
title = {Spoken Term Detection for Persian News of Islamic Republic of Iran Broadcasting}, 
abstract ={Islamic Republic of Iran Broadcasting (IRIB) as one of the biggest broadcasting organizations, produces thousands of hours of media content daily. Accordingly, the IRIB&#39;s archive is one of the richest archives in Iran containing a huge amount of multimedia data. Monitoring this massive volume of data, and brows and retrieval of this archive is one of the key issues for this broadcasting. The aim of this research is to design a content retrieval engine for the IRIB&#8217;s media and production using spoken term detection (STD) or keyword spotting. The goal of an STD system is to search for a set of keywords in a set of speech documents. One of the methods for STD is using a speech recognition system in which speech is recognized and converted into text and then, the text is searched for the keywords. Variety of speech documents and the limitation of speech recognition vocabulary are two challenges of this approach. Large vocabulary continuous speech recognition systems (LVCSR) usually have limited but large vocabulary and these systems can&#39;t recognize out of vocabulary (OOV) words. Therefore, LVCSR-based STD systems suffer OOV problem and can&#39;t spotting the OOV keywords. Methods such as the use of sub-word units (e.g., phonemes or syllables) and proxy words have been introduced to overcome the vocabulary limitation and to deal with the out of vocabulary (OOV) keywords. This paper proposes a Persian (Farsi) STD system based on speech recognition and uses the proxy words method to deal with OOV keywords. To improve the performance of this method, we have used Long Short-Term Memory-Connectionist Temporal Classification (LSTM-CTC) network. In our experiments, we have designed and implemented a large vocabulary continuous speech recognition systems for Farsi language. Large FarsDat dataset is used to train the speech recognition system. FarsDat contains 80 hours voices from 100 speakers. Kaldi toolkit is used to implement speech recognition system. Since limited dataset, Subspace Gaussian Mixture Models (SGMM) is used to train acoustic model of the speech recognition. Acoustic model is trained based context tri-phones and language model is probability tri-gram words model. Word Error Rate (WER) of Speech recognition system is 2. 71% on FARSDAT test set and also 28.23% on the Persian news collected from IRIB data. Term detection is designed based on weighted finite-state transducers (WFST). In this method, first a speech document is converted to a lattice by the speech recognizer (the lattice contains the full probability of speech recognition system instead of the most probable one), and then the lattice is converted to WFST. This WFST contains the full probability of words that speech recognition computed. Then, text retrieval is used to index and search over the WFST output. The proxy words method is used to deal with OOV. In this method, OOV words are represented by similarly pronunciation in-vocabulary words. To improve the performance of the proxy words methods, an LSTM-CTC network is proposed. This LSTM-CTC is trained based on charterers of words separately (not a continuous sentence). This LSTM-CTC recomputed the probabilities and re-verified proxy outputs. It improves proxy words methods dues to the fact that proxy words method suffers false alarms. Since LSTM-CTC is an end-to-end network and is trained based on the characters, it doesn&#39;t need a phonetic lexicon and can support OOV words. As the LSTM-CTC is trained based on the separate words, it reduces the weight of the language model and focuses on acoustic model weight. The proposed STD achieve 0.9206 based Actual Term Weighted Value (ATWV) for in vocabulary keywords and for OOV keywords ATWV is 0.2 using proxy word method. Applying the proposed LSTM-CTC improves the ATWV rate to 0.3058. On Persian news dataset, the proposed method receives ATWV of 0.8008.},  
Keywords = {Persian Spoken Term Detection, IRIB, Persian News, Keyword Spotting, Speech Recognition, Kaldi},
volume = {17},
Number = {4}, 
pages = {67-88}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.67},
url = {http://jsdp.rcisp.ac.ir/article-1-922-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-922-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {Pashaei, Zahra and Dehkharghani, Rahim},  
title = {Stock Market Modeling Using Artificial Neural Network and Comparison with Classical Linear Models}, 
abstract ={Stock market plays an important role in the world economy. Stock market customers are interested in predicting the stock market general index price, since their income depends on this financial factor; Therefore, a reliable forecast in stock market can be extremely profitable for stockholders. Stock market prediction for financial markets has been one of the main challenges in forecasting financial time series, in recent decades. This challenge has increasingly attracted researchers from different scientific branches such as computer science, statistics, mathematics, and etc. Despite a good deal of research in this area, the achieved success is far from ideal. Due to the intrinsic complexity of financial data in stock market, designing a practical model for this prediction is a difficult task. This difficulty increases when a wide variety of financial factors affect the stock market index. In this paper, we attempt to investigate this problem and propose an effective model to solve this challenge. Tehran&#8217;s stock market has been chosen as a real-world case study for this purpose. Concretely, we train a regression model by several features such as first and second market index in the last five years, as well as other influential features including US dollar price, universal gold price, petroleum price, industry index and floating currency index. Then, we use the trained system to predict the stock market index value of the following day. The proposed approach can be used by stockbrokers-trading companies that buy and sell shares for their clients to predict the stock market value. In the proposed method, intelligent nonlinear systems such as Artificial Neural Networks (ANNs) and Adaptive&#160;Network-based&#160;Fuzzy&#160;Inference System (ANFIS) have been exploited to predict the daily stock&#160; market value of Tehran&#8217;s stock market. At the end, the performance of these models have been measured and compared with the linear classical models, namely, ARIMA and SARIMA. In the comparison phase, these time series data are imposed to non-linear ANN and ANFIS models; then, feature selection is applied on data to extract the more influencing features, by using mutual information (MI) and correlation coefficient (CC) criteria. As a result, those features with greater impact on prediction are selected to predict the stock market value. This task eliminates irrelevant data and minimizes the error rate. Finally, all models are compared with each other based on common evaluation criteria to provide a big picture of the exploited models. The obtained results approve that the feature selection by MI and CC methods in both ANFIS and ANN models increases the accuracy of stock market prediction up to 55 percentage points. Furthermore, ANFIS could outperform ANN in all five evaluation criteria.},  
Keywords = {ARIMA, ANFIS, ANN, CC, MI, SARIMA, Stock Modelling},
volume = {17},
Number = {4}, 
pages = {89-102}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.89},
url = {http://jsdp.rcisp.ac.ir/article-1-939-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-939-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {najafi, fatemeh and parvin, hamid and mirzaei, kamal and nejatiyan, samad and rezaie, seyede vahideh},  
title = {A new ensemble clustering method based on fuzzy cmeans clustering while maintaining diversity in ensemble}, 
abstract ={An ensemble clustering has been considered as one of the research approaches in data mining, pattern recognition, machine learning and artificial intelligence over the last decade. In clustering, the combination first produces several bases clustering, and then, for their aggregation, a function is used to create a final cluster that is as similar as possible to all the cluster bundles. The input of this function is all base clusters and its output is a clustering called clustering agreement. This function is called an agreement function. Ensemble clustering has been proposed to increase efficiency, strong, reliability and clustering stability. Because of the lack of cluster monitoring, and the inadequacy of general-purpose base clustering algorithms on the other, a new approach called an ensemble clustering has been proposed in which it has been attempted to find an agreed cluster with the highest Consensus and agreement. In fact, ensemble clustering techniques with this slogan, the combination of several poorer models, is better than a strong model. However, this claim is correct if certain conditions (such as the diversity between the members in the consensus and their quality) are met. This article presents an ensemble clustering method. This paper uses the weak clustering method of fuzzy cmeans as a base cluster. Also, by adopting some measures, the diversity of consensus has increased. The proposed hybrid clustering method has the benefits of the clustering algorithm of fuzzy cmeans that has its speed, as well as the major weaknesses of the inability to detect non-spherical and non-uniform clusters. In the experimental results, we have tested the proposed ensemble clustering algorithm with different, up-to-date and robust clustering algorithms on the different data sets. Experimental results indicate the superiority of the proposed ensemble clustering method compared to other clustering algorithms to up-to-date and strong.},  
Keywords = {Ensemble Learning, Ensemble Clustering, Fuzzy Cmeans Clustering Algorithm, Data Validity},
volume = {17},
Number = {4}, 
pages = {103-122}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.103},
url = {http://jsdp.rcisp.ac.ir/article-1-976-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-976-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {soleimanian, Azam and Khazaei, Shahram},  
title = {Fuzzy retrieval of encrypted data by multi-purpose data-structures}, 
abstract ={The growing amount of information that has arisen from emerging technologies has caused organizations to face challenges in maintaining and managing their information. Expanding hardware, human resources, outsourcing data management, and maintenance an external organization in the form of cloud storage services, are two common approaches to overcome these challenges; The first approach costs of the organization is only a temporary solution. By contrast, the cloud storage services approach allows the organization to pay only a small fee for the space actually in use (rather than the total reserved capacity) and always has access to the data and management tools with the most up-to-date mechanisms available. Despite the benefits of cloud storage services, security challenges arise because the organization&#39;s data is stored and managed outside of the most important organization&#8217;s supervision. One challenge is confidentiality protection of outsourced data. Data encryption before outsourcing can overcome this challenge, but common encryption schemes may fail to support various functionalities in the cloud storage service. One of the most widely used functionalities in cloud storage services is secure keyword search on the encrypted documents collection. Searchable encryption schemes, enable users to securely search over encrypted data. Based on the users&#8217; needs, derivatives of this functionality have recently been considered by researchers. One of these derivatives is ranked search that allows the server to extract results based on their similarity to the searched keyword. This functionality reduces the communication overheads between the cloud server and the owner organization, as well as the response time for the search. In this paper, we focus on the ranked symmetric searchable encryption schemes. In this regard, we review structures proposed in the symmetric searchable encryption schemes, and show that these two data structures have capabilities beyond their original design goal. More precisely, we show that by making the data structures, it is possible to support secure ranked search efficiently. In addition, by small changes on these data, we present two ranked symmetric searchable encryption schemes for single keyword search and Boolean structures which introduced-keyword search based on the data.},  
Keywords = {Searchable encryption, Ranked search, Linked list, Lookup table, Fuzzy retrieval, Boolean query},
volume = {17},
Number = {4}, 
pages = {123-138}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.123},
url = {http://jsdp.rcisp.ac.ir/article-1-901-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-901-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {Momeny, Mohammad and Sarram, Mehdi Agha and Latif, AliMohammad and Sheikhpour, Razieh},  
title = {A Convolutional Neural Network based on Adaptive Pooling for Classification of Noisy Images}, 
abstract ={Convolutional neural network is one of the effective methods for classifying images that performs learning using convolutional, pooling and fully-connected layers. All kinds of noise disrupt the operation of this network. Noise images reduce classification accuracy and increase convolutional neural network training time. Noise is an unwanted signal that destroys the original signal. Noise changes the output values of a system, just as the value recorded in the output differs from its actual value. In the process of image encoding and transmission, when the image is passed through noisy transmission channel, the impulse noise with positive and negative pulses causes the image to be destroyed. A positive pulse in the form of white and a negative pulse in the form of black affect the image. The purpose of this paper is to introduce dynamic pooling which make the convolutional neural network stronger against the noisy image. The proposed method classifies noise images by weighting the values in the dynamic pooling region. In this research, a new method for modifying the pooling operator is presented in order to increase the accuracy of convolutional neural network in noise image classification. To remove noise in the dynamic pooling layer, it is sufficient to prevent the noise pixel processing by the dynamic pooling operator. Preventing noise pixel processing in the dynamic pooling layer prevents selecting the amount of noise to be applied to subsequent CNN layers. This increases the accuracy of the classification. There is a possibility of destroying the pixels of the entire window in the image. Due to the fact that the dynamic pooling operator is repeated several times in the layers of the convolutional neural network, the proposed method for merging noise pixels can be used many times. In the proposed dynamic pooling layer, pixels with a probability of p being destroyed by noise are not included in the dynamic pooling operation with the same probability. In other words, the participation of a pixel in the dynamic pooling layer depends on the health of that pixel value. If a pixel is likely to be noisy, it will not be processed in the proposed dynamic pooling layer with the same probability. To compare the proposed method, the trained VGG-Net model with medium and slow architecture has been used. Five convolutional layers and three fully connected layers are the components of the proposed model. The proposed method with 26% error for images corrupted with impulse noise with a density of 5% has a better performance than the compared methods. Increased efficiency and speed of convolutional neural network based on dynamic pooling layer modification for noise image classification is seen in the simulation results.},  
Keywords = {Convolutional neural network, Noise, Image classification, weighted pooling},
volume = {17},
Number = {4}, 
pages = {139-154}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.139},
url = {http://jsdp.rcisp.ac.ir/article-1-938-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-938-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}

@article{ 
author = {BabaAli, Bagher and Rekabdar, Babak},  
title = {Off-line Arabic Handwritten Recognition Using a Novel Hybrid HMM-DNN Model}, 
abstract ={In order to facilitate the entry of data into the computer and its digitalization, automatic recognition of printed texts and manuscripts is one of the considerable aid to many applications. Research on automatic document recognition started decades ago with the recognition of isolated digits and letters, and today, due to advancements in machine learning methods, efforts are being made to identify a sequence of handwritten words. Generally, based on the type of text, document recognition is divided into two main categories: printed and handwritten. Due to the limited number of fonts relative to the diversity of handwriting of different writers, it is much easier to recognize printed texts than handwritten text; thus, the technology of recognizing printed texts has matured and has been marketed in the form of a product. Handwritting recognition task is usually done in two ways: online and offline; offline handwriting recognition involves the automated translation of text in image format to letters that can be used in computer and text-processing applications. Most of the research in the field of handwriting recognition has been conducted on Latin script, and a variety of tools and resources have been gathered for this script. This article focuses on the application of the latest methods in the field of speech recognition for the recognition of Arabic handwriting. The task of handwritten text modeling and recognizing is very similar to the task of speech modeling and recognition. For this reason, it is possible to apply the approaches used for the speech recognition with a slight change for the handwriting recognition. With the expansion of HMM-DNN hybrid approaches and the use of sequential objective functions such as MMI, significant improvements have been made in the accuracy of speech recognition system.&#160; This paper presents a pipeline for the offline Arabic handwritten text recognition using the open source KALDI toolkit, which is very well-known in the community of speech recognition, as well as the use of the latest hybrid models presented in it and data augmentation techniques. This research has been conducted on the Arabic KHATT database, which achieved 7.32% absolute reduction in word recognition error (WER) rate.},  
Keywords = {Arabic Handwritten Recognition, Deep Neural Networks, Hidden Markov Model, Kaldi Toolkit},
volume = {17},
Number = {4}, 
pages = {155-168}, 
publisher = {Research Center on Developing Advanced Technologies},

doi = {10.29252/jsdp.17.4.155},
url = {http://jsdp.rcisp.ac.ir/article-1-975-en.html},  
eprint = {http://jsdp.rcisp.ac.ir/article-1-975-en.pdf},  
journal = {Signal and Data Processing},  
issn = {2538-4201}, 
eissn = {2538-421X}, 
year = {2021}  
}