@article{Manpreet Singh Bhatia, Alok Aggarwal, Narendra Kumar_2020, title={SPEECH-TO-TEXT CONVERSION USING GRU AND ONE HOT VECTOR ENCODINGS}, volume={17}, url={https://www.archives.palarch.nl/index.php/jae/article/view/5796}, abstractNote={<p>In this work an RNN based model with the gated recurrent unit (GRU) embedding is proposed to convert a raw speech audio into the speech. The method involves clearing of noise if any present in the audio and then extracting the speech from the audio and converting it to the text. Data collection is done manually by creating audio clips from microphone and taking samples of audio clips at a sampled frequency of 44100 Hertz with a sampling size of 1375. The proposed model is able to convert into text from various speakers with the different accents (acoustics). The input voice of the speaker can be from single speaker or multiple speakers and it is compared with the datasets of the voice of multiple speakers in the model. Proposed approach with GRU and RNN model produces good results having 87% accuracy on the test set which is better than the approaches like Char2Wav with 85% accuracy, GST with 82%, Listen, attend and spell with 85.9% and CTC attention based model with 85.4% accuracy. Though compared to Deep Voice 2 with Tecatron and RNN with EESEN based WFST accuracy of the proposed approach is somewhat lower level which gives accuracy of 93% and 90% respectively compared to accuracy of the proposed approach which is 87%.</p>}, number={9}, journal={PalArch’s Journal of Archaeology of Egypt / Egyptology}, author={Manpreet Singh Bhatia, Alok Aggarwal, Narendra Kumar}, year={2020}, month={Dec.}, pages={8513 - 8524} }