saattrupdan commited on
Commit
7089c67
·
1 Parent(s): ae07559

chore: Update deps

Browse files
Files changed (2) hide show
  1. app.py +5 -5
  2. requirements.txt +21 -14
app.py CHANGED
@@ -1,7 +1,6 @@
1
  """Røst speech-to-text demo."""
2
 
3
  import logging
4
- import os
5
  import warnings
6
 
7
  import gradio as gr
@@ -11,6 +10,7 @@ import torch
11
  from punctfix import PunctFixer
12
  from transformers import pipeline
13
  from dotenv import load_dotenv
 
14
 
15
  logging.basicConfig(
16
  level=logging.INFO,
@@ -69,6 +69,8 @@ transcriber = pipeline(
69
  logger.info("Loading the punctuation fixer model...")
70
  transcription_fixer = PunctFixer(language="da", device=device)
71
 
 
 
72
  logger.info("Models loaded, ready to transcribe audio.")
73
 
74
  def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray] | None) -> str:
@@ -92,6 +94,7 @@ def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray] | None) ->
92
  if audio.ndim > 1:
93
  audio = np.mean(audio, axis=1)
94
  audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
 
95
 
96
  logger.info(f"Transcribing audio clip of {len(audio) / 16_000:.2f} seconds...")
97
  transcription = transcriber(
@@ -111,13 +114,11 @@ def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray] | None) ->
111
  demo = gr.Interface(
112
  fn=transcribe_audio,
113
  inputs=gr.Audio(
114
- sources=["microphone", "upload"], show_label=False, min_length=1, max_length=60
115
  ),
116
  outputs="textbox",
117
  title=TITLE,
118
  description=DESCRIPTION,
119
- css="p { font-size: 1.0rem; }",
120
- allow_flagging="never",
121
  examples=[
122
  "https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/audio-examples/bornholmsk.wav",
123
  "https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/audio-examples/soenderjysk.wav",
@@ -125,7 +126,6 @@ demo = gr.Interface(
125
  "https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/audio-examples/accent.wav",
126
  ],
127
  cache_examples=False,
128
- theme=gr.themes.Soft(primary_hue="orange"),
129
  )
130
 
131
  demo.launch()
 
1
  """Røst speech-to-text demo."""
2
 
3
  import logging
 
4
  import warnings
5
 
6
  import gradio as gr
 
10
  from punctfix import PunctFixer
11
  from transformers import pipeline
12
  from dotenv import load_dotenv
13
+ import torch_audiomentations as ta
14
 
15
  logging.basicConfig(
16
  level=logging.INFO,
 
69
  logger.info("Loading the punctuation fixer model...")
70
  transcription_fixer = PunctFixer(language="da", device=device)
71
 
72
+ normaliser = ta.PeakNormalization(p=1.0)
73
+
74
  logger.info("Models loaded, ready to transcribe audio.")
75
 
76
  def transcribe_audio(sampling_rate_and_audio: tuple[int, np.ndarray] | None) -> str:
 
94
  if audio.ndim > 1:
95
  audio = np.mean(audio, axis=1)
96
  audio = samplerate.resample(audio, 16_000 / sampling_rate, "sinc_best")
97
+ audio = normaliser(torch.tensor(audio).unsqueeze(0).unsqueeze(0)).squeeze().numpy()
98
 
99
  logger.info(f"Transcribing audio clip of {len(audio) / 16_000:.2f} seconds...")
100
  transcription = transcriber(
 
114
  demo = gr.Interface(
115
  fn=transcribe_audio,
116
  inputs=gr.Audio(
117
+ sources=["microphone", "upload"], show_label=False
118
  ),
119
  outputs="textbox",
120
  title=TITLE,
121
  description=DESCRIPTION,
 
 
122
  examples=[
123
  "https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/audio-examples/bornholmsk.wav",
124
  "https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/audio-examples/soenderjysk.wav",
 
126
  "https://filedn.com/lRBwPhPxgV74tO0rDoe8SpH/audio-examples/accent.wav",
127
  ],
128
  cache_examples=False,
 
129
  )
130
 
131
  demo.launch()
requirements.txt CHANGED
@@ -1,29 +1,33 @@
1
  aiofiles==23.2.1
 
2
  annotated-types==0.7.0
3
  anyio==4.4.0
4
  attrs==24.2.0
 
5
  certifi==2024.8.30
6
  charset-normalizer==3.3.2
7
  click==8.1.7
8
  contourpy==1.3.0
9
  cycler==0.12.1
10
  exceptiongroup==1.2.2
11
- fastapi==0.115.0
12
  ffmpy==0.4.0
13
  filelock==3.16.1
14
  fonttools==4.53.1
15
  fsspec==2024.9.0
16
- gradio==4.44.0
17
- gradio_client==1.3.0
 
18
  h11==0.14.0
 
19
  httpcore==1.0.5
20
  httpx==0.27.2
21
- huggingface-hub==0.25.0
22
  hypothesis==6.112.1
23
  idna==3.10
24
  importlib_resources==6.4.5
25
  Jinja2==3.1.4
26
- kenlm @ https://github.com/kpu/kenlm/archive/master.zip
27
  kiwisolver==1.4.7
28
  markdown-it-py==3.0.0
29
  MarkupSafe==2.1.5
@@ -38,21 +42,22 @@ pandas==2.2.2
38
  pillow==10.4.0
39
  punctfix==0.11.1
40
  pyctcdecode==0.5.0
41
- pydantic==2.9.2
42
- pydantic_core==2.23.4
43
  pydub==0.25.1
44
  Pygments==2.18.0
45
  pygtrie==2.5.0
46
  pyparsing==3.1.4
47
  python-dateutil==2.9.0.post0
48
  python-dotenv==1.0.1
49
- python-multipart==0.0.9
50
  pytz==2024.2
51
  PyYAML==6.0.2
52
  regex==2024.9.11
53
  requests==2.32.3
54
  rich==13.8.1
55
  ruff==0.6.5
 
56
  safetensors==0.4.5
57
  samplerate==0.2.1
58
  semantic-version==2.10.0
@@ -60,15 +65,17 @@ shellingham==1.5.4
60
  six==1.16.0
61
  sniffio==1.3.1
62
  sortedcontainers==2.4.0
63
- starlette==0.38.5
64
- sympy==1.13.2
65
- tokenizers==0.19.1
66
  tomlkit==0.12.0
67
- torch==2.4.1
68
  tqdm==4.66.5
69
- transformers==4.44.2
70
  typer==0.12.5
71
- typing_extensions==4.12.2
 
 
72
  tzdata==2024.1
73
  urllib3==2.2.3
74
  uvicorn==0.30.6
 
1
  aiofiles==23.2.1
2
+ annotated-doc==0.0.4
3
  annotated-types==0.7.0
4
  anyio==4.4.0
5
  attrs==24.2.0
6
+ brotli==1.2.0
7
  certifi==2024.8.30
8
  charset-normalizer==3.3.2
9
  click==8.1.7
10
  contourpy==1.3.0
11
  cycler==0.12.1
12
  exceptiongroup==1.2.2
13
+ fastapi==0.122.0
14
  ffmpy==0.4.0
15
  filelock==3.16.1
16
  fonttools==4.53.1
17
  fsspec==2024.9.0
18
+ gradio==6.0.1
19
+ gradio_client==2.0.0
20
+ groovy==0.1.2
21
  h11==0.14.0
22
+ hf-xet==1.2.0
23
  httpcore==1.0.5
24
  httpx==0.27.2
25
+ huggingface-hub==0.36.0
26
  hypothesis==6.112.1
27
  idna==3.10
28
  importlib_resources==6.4.5
29
  Jinja2==3.1.4
30
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip#sha256=d23d300d559a45a5e3ede958dbbf2395231119c0b8cd97a1ea43480625894ff4
31
  kiwisolver==1.4.7
32
  markdown-it-py==3.0.0
33
  MarkupSafe==2.1.5
 
42
  pillow==10.4.0
43
  punctfix==0.11.1
44
  pyctcdecode==0.5.0
45
+ pydantic==2.12.4
46
+ pydantic_core==2.41.5
47
  pydub==0.25.1
48
  Pygments==2.18.0
49
  pygtrie==2.5.0
50
  pyparsing==3.1.4
51
  python-dateutil==2.9.0.post0
52
  python-dotenv==1.0.1
53
+ python-multipart==0.0.20
54
  pytz==2024.2
55
  PyYAML==6.0.2
56
  regex==2024.9.11
57
  requests==2.32.3
58
  rich==13.8.1
59
  ruff==0.6.5
60
+ safehttpx==0.1.7
61
  safetensors==0.4.5
62
  samplerate==0.2.1
63
  semantic-version==2.10.0
 
65
  six==1.16.0
66
  sniffio==1.3.1
67
  sortedcontainers==2.4.0
68
+ starlette==0.50.0
69
+ sympy==1.14.0
70
+ tokenizers==0.22.1
71
  tomlkit==0.12.0
72
+ torch==2.9.1
73
  tqdm==4.66.5
74
+ transformers==4.57.3
75
  typer==0.12.5
76
+ typer-slim==0.20.0
77
+ typing-inspection==0.4.2
78
+ typing_extensions==4.15.0
79
  tzdata==2024.1
80
  urllib3==2.2.3
81
  uvicorn==0.30.6