Prototype using the web APIs for speech

This is a test that will demonstrate how the standard web APIs can be used to recognise voice from audio input and to read out text.

Speech Recognition API test

Speech Synthesis API test

Read more here:

  1. SpeechSynthesis API documentation
  2. SpeechRecognition API documentation
  3. Can I use: SpeechRecognition API

Code

This code was generated for me by ChatGPT. Ironically I believe the SpeechSynthesis code is actually my own StackOverflow code from when I was creating my Google Docs Add-on Ready Steady Spell.

<!DOCTYPE html>
<html>
<head>
  <title>Speech Recognition and Synthesis</title>
  <style>
    button {
      font-size: 18px;
    }
    button:disabled {
      opacity: 0.5;
      cursor: not-allowed;
    }
  </style>
</head>
<body>
  <h1>Prototype using the web APIs for speech</h1>
  <p>This is a test that will demonstrate how the standard webkit APIs can be used to recognise voice from audio input,
 and to read out text.</p>
  <ul>
    <li><a href="https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesis">SpeechSynthesis API documentation</a></li>
    <li><a href="https://developer.mozilla.org/en-US/docs/Web/API/SpeechRecognition">SpeechRecognition API documentation </a></li>
    <li><a href="https://caniuse.com/speech-recognition">Can I use: SpeechRecognition API</a></li>
  </ul>

  <h2>Speech Recognition API test</h2>
  <button id="startButton">Start Recording</button>
  <button id="stopButton" disabled>Stop Recording</button>
  <div id="result" style="margin-top: 20px;font-size: 18px;"></div>

  <h2>Speech Synthesis API test</h2>
  <form id="textToSpeechForm">
    <label for="textToSpeechInput">Text:</label>
    <textarea id="textToSpeechInput"></textarea>
    <label for="voiceSelect">Voice:</label>
    <select id="voiceSelect"></select>
    <button type="submit">Speak</button>
  </form>

  <script>
    // Check browser support for SpeechRecognition and SpeechSynthesis APIs
    if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
      const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
      const recognition = new SpeechRecognition();
      const synthesis = window.speechSynthesis;
      const resultDiv = document.getElementById('result');
      const textToSpeechForm = document.getElementById('textToSpeechForm');
      const textToSpeechInput = document.getElementById('textToSpeechInput');
      const voiceSelect = document.getElementById('voiceSelect');
      let finalTranscript = '';

      recognition.continuous = true;

      // Event handler for when speech is recognized
      recognition.onresult = function(event) {
        let interimTranscript = '';
        for (let i = event.resultIndex; i < event.results.length; i++) {
          const transcript = event.results[i][0].transcript;
          if (event.results[i].isFinal) {
            finalTranscript += transcript;
          } else {
            interimTranscript += transcript;
          }
        }
        resultDiv.innerHTML = `<strong>Recognised text:</strong> ${finalTranscript + interimTranscript}`;
      };

      // Event handler for button clicks
      document.getElementById('startButton').onclick = function() {
        recognition.start();
        document.getElementById('startButton').disabled = true;
        document.getElementById('stopButton').disabled = false;
      };

      document.getElementById('stopButton').onclick = function() {
        recognition.stop();
        document.getElementById('startButton').disabled = false;
        document.getElementById('stopButton').disabled = true;
      };

      // Event handler for text-to-speech form submission
      textToSpeechForm.onsubmit = function(event) {
        event.preventDefault();
        const selectedVoice = voiceSelect.value;
        const text = textToSpeechInput.value;
        speak(text, selectedVoice);
      };

      // Function to convert text to speech
      function speak(text, voice) {
        const utterance = new SpeechSynthesisUtterance(text);
        if (voice) {
          const voices = synthesis.getVoices();
          const selectedVoice = voices.find(v => v.name === voice);
          utterance.voice = selectedVoice;
        }
        synthesis.speak(utterance);
      }

      // on load 
      window.onload = function() {
         populateVoiceList();
      };

      // Fetch available voices when the list is updated
      synthesis.onvoiceschanged = function() {
          populateVoiceList();
      };

      function populateVoiceList() {
        const voices = synthesis.getVoices();
        for (let i = 0; i < voices.length; i++) {
          const option = document.createElement('option');
          option.textContent = voices[i].name;
          voiceSelect.appendChild(option);
        }
      }

    } else {
      // Display an error message if the APIs are not supported
      resultDiv.innerText = 'Speech Recognition and/or Speech Synthesis APIs are not supported in this browser.';
    }
  </script>
</body>
</html>

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.