import React from 'react';
import ReactDOM from 'react-dom';


import awo from './awo.png'; // Tell Webpack this JS file uses this image
import higi from './higi.png';
import mfcc from './mfcc.png';
import asrhmm from './asrhmm.png';
import hmmallnums from './hmmallnums.jpeg';
import hmmglob from './hmmglob.jpeg';
import hmmtwo from './hmmtwo.png';
import w2v2 from './w2v2.jpeg';
// import w2v from './w2v.png';
import asrneural from './asrneural.png';
// import ctc from './ctc.png';
// import ctc1 from './ctc1.png';
import ctc2 from './ctc2.png';
import study1 from './study1.png';
// import study2 from './study2.png';
import study2c from './study2c.png';
import myw2v from './wav2vec2.png';




import {
  FlexBox,
  Heading,
  UnorderedList,
  ListItem,
  FullScreen,
  Progress,
  Appear,
  Slide,
  Deck,
  Text,
  Grid,
  Box,
  Image,
  Notes,
  Link
} from 'spectacle';

// const formidableLogo =
//   'https://avatars2.githubusercontent.com/u/5078602?s=280&v=4';


// SPECTACLE_CLI_THEME_START
const theme = {
  fonts: {
    header: '"Open Sans Condensed", Helvetica, Arial, sans-serif',
    text: '"Open Sans Condensed", Helvetica, Arial, sans-serif'
  }
};
// SPECTACLE_CLI_THEME_END

// SPECTACLE_CLI_TEMPLATE_START
const template = () => (
  <FlexBox
    justifyContent="space-between"
    position="absolute"
    bottom={0}
    width={1}
  >
    <Box padding="0 1em">
      <FullScreen />
    </Box>
    <Box padding="1em">
      <Progress />
    </Box>
  </FlexBox>
);
// SPECTACLE_CLI_TEMPLATE_END

// const SlideFragments = () => (
//   <>
//     <Slide>
//       <Text>This is a slide fragment.</Text>
//     </Slide>
//     <Slide>
//       <Text>This is also a slide fragment.</Text>
//       <Appear>
//         <Text>This item shows up!</Text>
//       </Appear>
//       <Appear>
//         <Text>This item also shows up!</Text>
//       </Appear>
//     </Slide>
//   </>
// );

const Presentation = () => (
  <Deck theme={theme} template={template}>

<Slide
      backgroundColor="tertiary"
      backgroundImage="url(https://source.unsplash.com/lZCHy8PLyyo/1600x900)"
      backgroundOpacity={0.7}
    >
      <FlexBox height="100%" flexDirection="column">
        <Heading margin="0px" color="primary" fontSize="150px">
          <i>State of the Art ASR</i>
        </Heading>
        <Heading margin="0px" color="primary" fontSize="h2">
          Tolúlọpẹ́ Ògúnrẹ̀mí
        </Heading>
        {/* <Heading margin="0px 32px" color="primary" fontSize="h3">
          Where you can write your decks in JSX, Markdown, or MDX!
        </Heading> */}
      </FlexBox>
      <Notes>
        Hello everyone! Today I will be 'Speaking from Practice' about ASR.
        Due to the low-resource languages I work with, I think it's important to cover both 'traditional' and state of the art methods.
      </Notes>
    </Slide>

    <Slide>
      <Heading>We will cover:</Heading>
      <UnorderedList>
        {/* <Appear>
          <ListItem>What is ASR?</ListItem>
        </Appear> */}
        <Appear>
          <ListItem>What is Speech?</ListItem>
        </Appear>
        <Appear>
          <ListItem>
            What is Text?
          </ListItem>
        </Appear>
        <Appear>
          <ListItem>
            How do we go from Speech to Text?
            <ul>
              <ListItem>
                Hidden Markov Model ASR
              </ListItem>

              <ListItem>
                State of the Art Methods - wav2vec 2.0
              </ListItem>

            </ul>

          </ListItem>
        </Appear>
        
        {/* <Appear>
        <ListItem>
          Zero-resource Code switching           
        </ListItem>
        </Appear> */}

        <Appear>
          <ListItem>
            How to get started with ASR!
          </ListItem>
        </Appear>


      </UnorderedList>
    </Slide>


    <Slide>

    <Heading fontSize="h3">What is Speech?</Heading>


      <Grid
        flex={1}
        gridTemplateColumns="50% 50%"
        gridTemplateRows="100% 100%"
        height="100%"
      >
        <FlexBox alignItems="center" justifyContent="center">
        <iframe width="90%" height="100%" src="https://www.youtube.com/embed/ANHvP6v3DYE" title="Awo Praat" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
        </FlexBox>
        <FlexBox alignItems="center" justifyContent="center">
        <iframe width="90%" height="100%" src="https://www.youtube.com/embed/RI2Pgh8Jfkk" title="Higi haga Praat" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>        
        </FlexBox>
      </Grid>
    </Slide>



  <Slide>

    <Heading fontSize="h3">What is Speech?</Heading>

      <Grid
        flex={1}
        gridTemplateColumns="100% 100%"
        gridTemplateRows="50% 50%"
        height="80%"
      >
        <FlexBox alignItems="center" justifyContent="center">
          {/* <Heading>This is a 4x4 Grid</Heading> */}
          <img width="75%" height="90%" src={awo} alt="Awo Praat" />
        </FlexBox>
        {/* <FlexBox alignItems="center" justifyContent="center">
          <Text textAlign="center">
            With all the content aligned and justified center.
          </Text>
        </FlexBox> */}
        <FlexBox alignItems="center" justifyContent="center">
          {/* <Text textAlign="center">
            It uses Spectacle <CodeSpan>{'<Grid />'}</CodeSpan> and{' '}
            <CodeSpan>{'<FlexBox />'}</CodeSpan> components.
          </Text> */}
        </FlexBox>
        <FlexBox alignItems="center" justifyContent="center">
        <img width="75%" height="90%" src={higi} alt="Awo Praat" />
        </FlexBox>
      </Grid>
    </Slide>


    
    {/* <MarkdownSlide>
      {`
        # What is Speech?

        ![alt text](./mfcc.png "MFCCS")


        `}
    </MarkdownSlide> */}





<Slide>

  <Heading fontSize="h3"> What is Speech?</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="80% 20%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="85%" height="100%" src={mfcc} alt="Mfcc creation" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://jonathan-hui.medium.com/speech-recognition-asr-model-training-90ed50d93615">source</Link></Text>       
    <Text fontSize="28px" textAlign="center">Until recently, speech was primarily acoustic features in the from of MFCCs. </Text>       
    </Box>
  </Grid>

  <Notes>
    <UnorderedList>
      <ListItem fontSize="16px"> The sound waves that we produce when speaking have a frequency (number of vibrations per second), measured in Hertz </ListItem>
      <ListItem fontSize="16px"> Digital audio signals are measurements of amplitude of a sound wave in time - samples </ListItem>
      <ListItem fontSize="16px"> The number of samples per second is called the sampling rate </ListItem>
      <ListItem fontSize="16px"> We perform a Fast Fourier Transform to produces a spectrum of frequencies </ListItem>
      <ListItem fontSize="16px"> To liken the spectrum to human perception of pitch, the frequency spectrum is mapped to the Mel scale </ListItem>
      <ListItem fontSize="16px"> Due to some methods requiring decorrelated features, a discrete cosine transform is taken</ListItem>
      <ListItem fontSize="16px"> The amplitudes and frequencies of the resulting spectrum are called MFCCs.</ListItem>
   </UnorderedList>
  </Notes>

</Slide>



<Slide>
        <Heading fontSize="h3"> What is Text?</Heading>

        <UnorderedList>
        <Appear>
          <ListItem>Graphemes</ListItem>
        </Appear>
        <Appear>
          <ListItem>Pronunciation models</ListItem>
        </Appear>
        <Appear>
          <ListItem>
            Language models
          </ListItem>
        </Appear>
        <Appear>
          <ListItem>
            Whole words
          </ListItem>
        </Appear>

      </UnorderedList>

      <Notes>
        <UnorderedList>
          <ListItem fontSize="16px"> Grapheme/Characters are used often in modern techniques </ListItem>
          <ListItem fontSize="16px"> Pronunciation models can help model pronunciation across language varieties</ListItem>
          <ListItem fontSize="16px"> Language models help decide the most likely word in context. We do this as humans! </ListItem>
          <ListItem fontSize="16px"> Very small models can predict whole words, like a digit recogniser. </ListItem>
      </UnorderedList>
    </Notes>

</Slide>








<Slide>

  <Heading fontSize="h3"> How do we go from Speech to Text? (HMM)</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="80% 20%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="85%" height="100%" src={asrhmm} alt="ASR Process" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://web.stanford.edu/class/archive/cs/cs224n/cs224n.1174/lectures/cs224n-2017-lecture12.pdf">CS224N Lecture</Link></Text>       
    {/* <Text fontSize="28px" textAlign="center">Until recently, speech was primarily acoustic features in the from of MFCCs. </Text>        */}
    </Box>
  </Grid>

  <Notes>
        <UnorderedList>
          <ListItem fontSize="16px"> HMM-based speech recognition can be divided into several subtasks </ListItem>
          <ListItem fontSize="16px"> We've already covered the speech processing that is required to obtain acoustic features</ListItem>
          <ListItem fontSize="16px"> The acoustic features are used to make an acoustic model, a model of how the speech features are mapped to phones </ListItem>
          <ListItem fontSize="16px"> Once phones are predicted, they are sequentially mapped whole words with pronunciation models  </ListItem>
          <ListItem fontSize="16px"> The language model helps decide whether the predicted words are likely in context. </ListItem>
      </UnorderedList>
    </Notes>

</Slide>



<Slide>

  <Heading fontSize="h3"> How do we go from Speech to Text? (HMM)</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="80% 20%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="85%" height="100%" src={hmmglob} alt="ASR Process" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://jonathan-hui.medium.com/speech-recognition-asr-model-training-90ed50d93615">source</Link></Text>       
    {/* <Text fontSize="28px" textAlign="center">Until recently, speech was primarily acoustic features in the from of MFCCs. </Text>        */}
    </Box>
  </Grid>

  <Notes>
        <UnorderedList>
          <ListItem fontSize="16px"> Here is a linear diagram of the process.</ListItem>
          <ListItem fontSize="16px"> We can see that we have three HMM states per phone, this is common across diagrams. It allows for varying lengths of pronunciation of phones.</ListItem>

      </UnorderedList>
  </Notes>

</Slide>




<Slide>

  <Heading fontSize="h3"> How do we go from Speech to Text? (HMM)</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="80% 20%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="85%" height="100%" src={hmmallnums} alt="ASR Process" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://jonathan-hui.medium.com/speech-recognition-asr-model-training-90ed50d93615">source</Link></Text>       
    <Text fontSize="28px" textAlign="center">Digit Recogniser Diagram </Text>       
    </Box>
  </Grid>

  <Notes>
        <UnorderedList>
          <ListItem fontSize="16px"> Imagine you're in a situation where you'd like jot down someone's phone number but don't have a pen or paper! You can build a smaller ASR model to capture it. </ListItem>
          <ListItem fontSize="16px"> On the left we have each HMM for individual words, on the right you can see them all combined into one model</ListItem>
          <ListItem fontSize="16px"> The arrows depict probabilities. When entering the HMM with an acoustic feature, the arrow with the highest probaility will be followed </ListItem>
          <ListItem fontSize="16px"> Once you get to states for individual phones, there is an arrow going to the same state, this allows for varying phone length</ListItem>
          <ListItem fontSize="16px"> The arrow toward the final state emits the word prediction (a number)</ListItem>
          <ListItem fontSize="16px"> This can be looped to allow multiple predictions. </ListItem>
      </UnorderedList>
  </Notes>

</Slide>



<Slide>

  <Heading fontSize="h3"> How do we go from Speech to Text? (HMM)</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="80% 20%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="85%" height="100%" src={hmmtwo} alt="ASR Process" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://github.com/DeuroIO/Stanford-CS-224S-Speech-Recognition/blob/master/slides/224s.17.lec3%E8%BF%99%E4%B8%AA.pdf">CS224S Lecture</Link></Text>       
    {/* <Text fontSize="28px" textAlign="center">Until recently, speech was primarily acoustic features in the from of MFCCs. </Text>        */}
    </Box>
  </Grid>


  <Notes>
        <UnorderedList>
          <ListItem fontSize="16px"> Here is a example showing the three states per phone and conditional probabilities of going from one number to the next  </ListItem>
          <ListItem fontSize="16px"> Coartculation is where articulation of phones depends on the neigbouring phones, so different probabilities for different word combinations can help that. </ListItem>
          <ListItem fontSize="16px"> There's no need for a language model in the digit recogniser case, but in a language-wide case, the predicted words would be subject to a language model. </ListItem>
          <ListItem fontSize="16px">  </ListItem>
      </UnorderedList>
  </Notes>


</Slide>

<Slide>

  <Heading fontSize="h3"> How do we go from Speech to Text? (Neural)</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="80% 20%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="85%" height="100%" src={asrneural} alt="ASR Process" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://web.stanford.edu/class/archive/cs/cs224n/cs224n.1174/lectures/cs224n-2017-lecture12.pdf">CS224N Lecture</Link></Text>       
    {/* <Text fontSize="28px" textAlign="center">Until recently, speech was primarily acoustic features in the from of MFCCs. </Text>        */}
    </Box>
  </Grid>

  <Notes>
        <UnorderedList>
          <ListItem fontSize="16px"> Neural ASR methods started with separate neural models for each step of the process.</ListItem>
          <ListItem fontSize="16px"> Speech processing was replaced with convlutional methods to extract features from raw audio</ListItem>
          <ListItem fontSize="16px"> These methods consisted of gluing several neural models together, so there was shift to end-to-end models</ListItem>
          <ListItem fontSize="16px">  </ListItem>
      </UnorderedList>
  </Notes>

</Slide>


{/* <Slide>

  <Heading fontSize="h3"> How do we go from Speech to Text? (CTC)</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="100%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center', alignItems: 'center'}}>
    <Image width="85%" height="100%" src={ctc1} alt="ASR Process" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://distill.pub/2017/ctc/">Distill</Link></Text>       
    <Text fontSize="28px" textAlign="center">Until recently, speech was primarily acoustic features in the from of MFCCs. </Text>       
    </Box>
  </Grid>

</Slide> */}

<Slide>

  <Heading fontSize="h3"> How do we go from Speech to Text? (CTC)</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="100%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center', alignItems: 'center'}}>
    <Image width="85%" height="100%" src={ctc2} alt="ASR Process" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://towardsdatascience.com/audio-deep-learning-made-simple-automatic-speech-recognition-asr-how-it-works-716cfce4c706">source</Link></Text>       
    {/* <Text fontSize="28px" textAlign="center">Until recently, speech was primarily acoustic features in the from of MFCCs. </Text>        */}
    </Box>
  </Grid>
  <Notes>
        <UnorderedList>
          <ListItem fontSize="16px"> Connectionist Temporal Classification is an algorithm applied in speech recognition to allow for predictions of individual characters to be mapped to words. </ListItem>
          <ListItem fontSize="16px"> An epsilon or 'blank' character is added to the possible set of predictions and the placement of epislon in the initial output allows the reduction of multi-character output to words in a language. </ListItem>
          <ListItem fontSize="16px"> This allows neural models like RNNS and LSTMs to be employed directly to speech (sometimes MFCCs, sometimes raw audio)</ListItem>
          <ListItem fontSize="16px"> An example employing this to an RNN style architecture is DeepSpeech. </ListItem>
          <ListItem fontSize="16px">  </ListItem>
      </UnorderedList>
  </Notes>
</Slide>



<Slide>

  <Heading fontSize="h3"> How do we go from Speech to Text? (wav2vec 2.0)</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="80% 20%"
    height="70%"
    >
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="85%" height="100%" src={w2v2} alt="ASR Process" />
    </Box>
    <Box>
    <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/">FAIR Blog post</Link></Text>       
    {/* <Text fontSize="28px" textAlign="center">Until recently, speech was primarily acoustic features in the from of MFCCs. </Text>        */}
    </Box>
  </Grid>

  <Notes>
        <UnorderedList>
         <ListItem fontSize="16px"> Self-supervised speech reprsentations -> BERT for audio data </ListItem>
          <ListItem fontSize="16px"> "a multi-layer convolutional feature encoder f : X → Z which takes as input raw audio X and outputs latent speech representations z1, . . . , zT for T time-steps." </ListItem>
          <ListItem fontSize="16px"> "They are then fed to a Transformer g : Z 7→ C to build representations c1, . . . , cT capturing information from the entire sequence" </ListItem>
          <ListItem fontSize="16px"> "The output of the feature encoder is discretized to qt with a quantization module Z → Q to represent the targets (Figure 1) in the self-supervised objective"</ListItem>
          <ListItem fontSize="16px"> You can add a CTC head to the resulting representations and a langauge model on top of that </ListItem>
          <ListItem fontSize="16px"> VERY expensive to train - everyone fine-tunes it or uses the released speech representations. </ListItem>
      </UnorderedList>
  </Notes>

</Slide>


{/* 
<Slide>
        <Heading fontSize="h3"> Zero resource code switching</Heading>

        <UnorderedList>
        <Appear>
          <ListItem>Fine-tuned wav2vec 2.0 XLSR</ListItem>
        </Appear>
        <Appear>
          <ListItem>Resulting models are fine-tuned primarily on monolingual data (Yorùbá and Nigerian English)</ListItem>
        </Appear>
        <Appear>
          <ListItem>
            A small, code-switched dataset was created for experiments. Instances are from Yorùbá Nollywood film and Yorùbá News Broadcasts.
          </ListItem>
        </Appear>
        <Appear>
          <ListItem>
           No lexicon or language model.
          </ListItem>
        </Appear>
      </UnorderedList>

    </Slide> */}


  <Slide>

  <Heading fontSize="h3"> Zero resource code switching</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="20% 80%"
    height="70%"
    >
    <Box>
    {/* <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://jonathan-hui.medium.com/speech-recognition-asr-model-training-90ed50d93615">source</Link></Text>        */}
    <Text fontSize="28px" textAlign="center"> Fine-tuned wav2vec 2.0 XLSR </Text>       
    </Box>
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="60%" height="100%" src={myw2v} alt="Mfcc creation" />
    </Box>
  </Grid>

  <Notes>
  </Notes>

</Slide>


<Slide>

  <Heading fontSize="h3"> Zero resource code switching</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="20% 80%"
    height="70%"
    >
    <Box>
    {/* <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://jonathan-hui.medium.com/speech-recognition-asr-model-training-90ed50d93615">source</Link></Text>        */}
    <Text fontSize="28px" textAlign="center"> Resulting models are fine-tuned primarily on monolingual data (Yorùbá and Nigerian English) </Text>       
    </Box>
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="60%" height="100%" src={study1} alt="Mfcc creation" />
    </Box>
  </Grid>

  <Notes>
  </Notes>

</Slide>


<Slide>

  <Heading fontSize="h3" margin="0px"> Zero resource code switching</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="20% 80%"
    height="80%"
    >
    <Box>
    {/* <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://jonathan-hui.medium.com/speech-recognition-asr-model-training-90ed50d93615">source</Link></Text>        */}
    <Text fontSize="28px" textAlign="center"> A small, code-switched dataset was created for experiments. Instances are from Yorùbá Nollywood film and Yorùbá News Broadcasts. </Text>       
    </Box>
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <iframe width="60%" height="100%" src="https://www.youtube.com/embed/nDIvaW7rPXo?start=76" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
    </Box>
  </Grid>

  <Notes>
  </Notes>

</Slide>

    {/* <Slide>
        <Heading fontSize="h3" > Zero resource code switching</Heading>

        <UnorderedList>
        <Appear>
          <ListItem>Fine-tuned wav2vec 2.0 XLSR</ListItem>
        </Appear>
        <Appear>
          <ListItem>Resulting models are fine-tuned primarily on monolingual data (Yorùbá and Nigerian English)</ListItem>
        </Appear>
        <Appear>
          <ListItem>
            A small, code-switched dataset was created for experiments. Instances are from Yorùbá Nollywood film and Yorùbá News Broadcasts.
          </ListItem>
        </Appear>
        <Appear>
          <ListItem>
           No lexicon or language model.
          </ListItem>
        </Appear>
      </UnorderedList>

    </Slide> */}





  <Slide>
      <Heading fontSize="h3"> Zero resource code switching</Heading>

      <UnorderedList>
      <Appear>
          <ListItem>
           No lexicon or language model.
          </ListItem>
       </Appear>
      <Appear>
        <ListItem>Evidence of latent phonetic representation in the model</ListItem>
        <ul>
            <ListItem>
            mock -> mọrk
            </ListItem>

            <ListItem>
            america -> a mẹrika
            </ListItem>

            <ListItem>
            she’s a  -> ṣhe  is  a
            </ListItem>
          </ul>
      </Appear>
      {/* <Appear>
        <ListItem>Ran linear and neural probes on the data and found code-switching detection</ListItem>
      </Appear> */}
    </UnorderedList>

  </Slide>

  <Slide>

  <Heading fontSize="h3"> Zero resource code switching</Heading>

  <Grid
    flex={1}
    gridTemplateColumns="100%"
    gridTemplateRows="20% 80%"
    height="70%"
    >
    <Box>
    {/* <Text fontSize="14px" textAlign="center" style={{padding: 0}}>Image from <Link target="_blank" fontSize="14px" href="https://jonathan-hui.medium.com/speech-recognition-asr-model-training-90ed50d93615">source</Link></Text>        */}
    <Text fontSize="28px" textAlign="center"> Ran linear and neural probes on the data and found code-switching detection </Text>       
    </Box>
    <Box style={{display: 'flex', justifyContent: 'center'}}>
    <Image width="85%" height="100%" src={study2c} alt="Mfcc creation" />
    </Box>
  </Grid>

  <Notes>
  </Notes>

</Slide>



  <Slide>
      <Heading fontSize="h3"> Get Started with ASR</Heading>

      <UnorderedList>
      <Appear>
        <ListItem>Nigerian Speech Datasets</ListItem>
        <ul>
            <ListItem>
            <Link target="_blank" href="https://repo.sadilar.org/handle/20.500.12185/431">Lagos NWU Yoruba Speech Corpus  </Link>        
              </ListItem>

            <ListItem>
            <Link target="_blank" href="https://www.openslr.org/86/">Crowdsourced high-quality Yoruba speech data set </Link>   
            </ListItem>

            <ListItem>
            <Link target="_blank" href="https://www.openslr.org/70">Nigerian English Dataset </Link>          
            </ListItem>

            <ListItem>
            <Link target="_blank" href="https://catalog.ldc.upenn.edu/LDC2019S16">Igbo Dataset (Paid)</Link>
            </ListItem>

            <ListItem>
            <Link target="_blank" href="https://github.com/danielajisafe/ASR-Nigeria-Pidgin">Pidgin Dataset</Link>
            </ListItem>

          </ul>
      </Appear>

      </UnorderedList>

      </Slide>



      <Slide>
      <Heading fontSize="h3" margin="0px"> Get Started with ASR</Heading>

      <UnorderedList>

      <Appear>
        <ListItem>HMM/GMM - Kaldi</ListItem>
        <ul>
            <ListItem>
              Once installed, experiments can be done offline        
            </ListItem>

            <ListItem>
              You will need a pronunciation lexicon to get started. Check out <Link target="_blank" href="https://github.com/dmort27/epitran">Epitran</Link> to make your own!
            </ListItem>

            <ListItem>
            Check out their <Link target="_blank" href="https://kaldi-asr.org/doc/">documentation </Link>
            </ListItem>

            <ListItem>
            Best starting point is a <Link target="_blank" href="https://eleanorchodroff.com/tutorial/kaldi/index.html"> tutorial by Eleanor Chodroff </Link>
            </ListItem>

          </ul>
      </Appear>
      <Appear>
      <ListItem>wav2vec2.0 </ListItem>
        <ul>
        <ListItem><Link target="_blank" href="https://proceedings.neurips.cc/paper/2020/file/92d1e1eb1cd6f9fba3227870bb6d7f07-Paper.pdf">wav2vec2.0 Paper</Link></ListItem>
        <ListItem><Link target="_blank" href="https://huggingface.co/blog/fine-tune-xlsr-wav2vec2">wav2vec2.0 Hugging face Tutorial</Link></ListItem>
        </ul>
      </Appear>
    </UnorderedList>

  </Slide>




  <Slide>
      <FlexBox height="100%" flexDirection="column">
        <Heading margin="0px" fontSize="150px">
          <i>Thank you!</i>
        </Heading>
        <Heading margin="0px" fontSize="h6">
          <Link target="_blank" href="https://bit.ly/adesfa-asr"> Check out links on everything covered here. </Link>.
        </Heading>
      </FlexBox>
      <Notes>
        Spectacle supports notes per slide again.
        <ol>
          <li>Notes can now be HTML markup!</li>
          <li>Lists can make it easier to make points.</li>
          <li>Let's see of this works....</li>
        </ol>
      </Notes>
    </Slide>













    {/* <Slide>
      <FlexBox height="100%" flexDirection="column">
        <Heading margin="0px" fontSize="150px">
          ✨<i>Spectacle</i> ✨
        </Heading>
        <Heading margin="0px" fontSize="h2">
          A ReactJS Presentation Library
        </Heading>
        <Heading margin="0px 32px" color="primary" fontSize="h3">
          Where you can write your decks in JSX, Markdown, or MDX!
        </Heading>
      </FlexBox>
      <Notes>
        Spectacle supports notes per slide again.
        <ol>
          <li>Notes can now be HTML markup!</li>
          <li>Lists can make it easier to make points.</li>
          <li>Let's see of this works....</li>
        </ol>
      </Notes>
    </Slide> */}


    {/* <Slide
      transition={{
        from: {
          transform: 'scale(0.5) rotate(45deg)',
          opacity: 0
        },
        enter: {
          transform: 'scale(1) rotate(0)',
          opacity: 1
        },
        leave: {
          transform: 'scale(0.2) rotate(315deg)',
          opacity: 0
        }
      }}
      backgroundColor="tertiary"
      backgroundImage="url(https://github.com/FormidableLabs/dogs/blob/main/src/beau.jpg?raw=true)"
      backgroundOpacity={0.5}
    >
      <Heading>Custom Backgrounds</Heading>
      <UnorderedList>
        <ListItem>
          <CodeSpan>backgroundColor</CodeSpan>
        </ListItem>
        <ListItem>
          <CodeSpan>backgroundImage</CodeSpan>
        </ListItem>
        <ListItem>
          <CodeSpan>backgroundOpacity</CodeSpan>
        </ListItem>
        <ListItem>
          <CodeSpan>backgroundSize</CodeSpan>
        </ListItem>
        <ListItem>
          <CodeSpan>backgroundPosition</CodeSpan>
        </ListItem>
        <ListItem>
          <CodeSpan>backgroundRepeat</CodeSpan>
        </ListItem>
      </UnorderedList>
    </Slide>
    <Slide>
      <Heading>Animated Elements</Heading>
      <OrderedList>
        <Appear>
          <ListItem>Elements can animate in!</ListItem>
        </Appear>
        <Appear>
          <ListItem>Out of order</ListItem>
        </Appear>
        <Appear priority={0}>
          <ListItem>
            Just identify the order with the prop <CodeSpan>priority</CodeSpan>!
          </ListItem>
        </Appear>
      </OrderedList>
    </Slide> */}


    {/* <Slide>
      <FlexBox>
        <Text>These</Text>
        <Text>Text</Text>
        <Text color="secondary">Items</Text>
        <Text fontWeight="bold">Flex</Text>
      </FlexBox>
      <Grid gridTemplateColumns="1fr 2fr" gridColumnGap={15}>
        <Box backgroundColor="primary">
          <Text color="secondary">Single-size Grid Item</Text>
        </Box>
        <Box backgroundColor="secondary">
          <Text>Double-size Grid Item</Text>
        </Box>
      </Grid>
      <Grid
        gridTemplateColumns="1fr 1fr 1fr"
        gridTemplateRows="1fr 1fr 1fr"
        alignItems="center"
        justifyContent="center"
        gridRowGap={1}
      >
        {Array(9)
          .fill('')
          .map((_, index) => (
            <FlexBox paddingTop={0} key={`formidable-logo-${index}`} flex={1}>
              <Image src={formidableLogo} width={100} />
            </FlexBox>
          ))}
      </Grid>
    </Slide> */}




    {/* <SlideFragments /> */}


    {/* <Slide>
      <CodePane language="jsx">{`
        import { createClient, Provider } from 'urql';

        const client = createClient({ url: 'https://0ufyz.sse.codesandbox.io' });

        const App = () => (
          <Provider value={client}>
            <Todos />
          </Provider>
        );
        `}</CodePane>
      <Box height={20} />
      <CodePane language="java" showLineNumbers={false}>{`
        public class NoLineNumbers {
          public static void main(String[] args) {
            System.out.println("Hello");
          }
        }
        `}</CodePane>
    </Slide> */}


    {/* <div>
      <Slide>
        <Heading>This is a slide embedded in a div</Heading>
      </Slide>
    </div> */}



    {/* <MarkdownSlide componentProps={{ color: 'yellow' }}>
      {`
        # This is a Markdown Slide

        - You can pass props down to all elements on the slide.
        - Just use the \`componentProps\` prop.
        `}
    </MarkdownSlide> */}


    {/* <MarkdownSlide animateListItems>
      {`
       # This is also a Markdown Slide

       It uses the \`animateListItems\` prop.

       - Its list items...
       - ...will appear...
       - ...one at a time.
      `}
    </MarkdownSlide> */}


    {/* <Slide>
      <Grid
        flex={1}
        gridTemplateColumns="50% 50%"
        gridTemplateRows="50% 50%"
        height="100%"
      >
        <FlexBox alignItems="center" justifyContent="center">
          <Heading>This is a 4x4 Grid</Heading>
        </FlexBox>
        <FlexBox alignItems="center" justifyContent="center">
          <Text textAlign="center">
            With all the content aligned and justified center.
          </Text>
        </FlexBox>
        <FlexBox alignItems="center" justifyContent="center">
          <Text textAlign="center">
            It uses Spectacle <CodeSpan>{'<Grid />'}</CodeSpan> and{' '}
            <CodeSpan>{'<FlexBox />'}</CodeSpan> components.
          </Text>
        </FlexBox>
        <FlexBox alignItems="center" justifyContent="center">
          <Box width={200} height={200} backgroundColor="secondary" />
        </FlexBox>
      </Grid>
    </Slide> */}



    {/* <MarkdownSlideSet>
      {`
        # This is the first slide of a Markdown Slide Set
        ---
        # This is the second slide of a Markdown Slide Set
        `}
    </MarkdownSlideSet> */}

  </Deck>
);

ReactDOM.render(<Presentation />, document.getElementById('root'));