Skip to content

Commit 269152b

Browse files
committed
Spectrogram File Format (SFF)
1 parent ea44526 commit 269152b

12 files changed

Lines changed: 480 additions & 3 deletions

File tree

README.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,44 @@ Viridis | Jet | GrayReversed | GreensReversed
125125

126126
See all colormaps in [dev/colormap/](dev/colormap/)
127127

128+
## Spectrogram File Format (SFF)
129+
130+
The Spectrogram library has methods which can read and write spectrogram data from SFF files. These files contain 2D spectrogram data (repeated FFTs) stored as double-precision floating-point values and a small header describing the audio and FFT settings suitable for deriving scale information.
131+
132+
SFF files can store `Complex` values (with real and imaginary values for each point) to faithfully represent the FFT output, or stored with `double` values to represent magnitude (with an optional pre-conversion to Decibels to represent power).
133+
134+
```cs
135+
double[] audio = Read.WavInt16mono("hal.wav");
136+
int sampleRate = 44100;
137+
int fftSize = 1 << 12;
138+
var spec = new Spectrogram(sampleRate, fftSize, stepSize: 700, maxFreq: 2000);
139+
spec.Add(audio);
140+
spec.SaveData("hal.sff");
141+
```
142+
143+
This file can now be read in any language. A Python module to read SFF files has been created (in [dev/sff/sffLib.py](dev/sff/sffLib.py)) which allows Spectrograms created by this library and stored in SFF format to be loaded as 2D numpy arrays in Python.
144+
145+
```python
146+
import matplotlib.pyplot as plt
147+
import sffLib
148+
149+
# load spectrogram data as a 2D numpy array
150+
sf = sffLib.SpectrogramFile("/hal.sff")
151+
152+
# plot the spectrogram as a heatmap
153+
freqs = np.arange(sf.values.shape[1]) * sf.hzPerPx / 1000
154+
times = np.arange(sf.values.shape[0]) * sf.secPerPx
155+
plt.pcolormesh(freqs, times, sf.values)
156+
157+
# decorate the plot
158+
plt.colorbar()
159+
plt.title("Spectrogram Magnitude (RMS)")
160+
plt.ylabel("Time (seconds)")
161+
plt.xlabel("Frequency (kHz)")
162+
plt.show()
163+
```
164+
165+
![](dev/sff/hal.png)
128166

129167
## Resources
130168
* [FftSharp](https://github.com/swharden/FftSharp) - the module which actually performs the FFT and related transformations

dev/sff/complex.sff

1.55 MB
Binary file not shown.

dev/sff/hal.png

90.7 KB
Loading

dev/sff/hal.sff

336 KB
Binary file not shown.

dev/sff/readme.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# The Spectrogram File Format (SFF)
2+
3+
## Resources
4+
* [Designing File Formats](https://fadden.com/tech/file-formats.html)
5+
* [A brief look at file format design](http://decoy.iki.fi/texts/filefd/filefd)
6+
* [Standard Flowgram Format (SFF)](https://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format#sff)

dev/sff/sffDemoComplex.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import os
2+
import matplotlib.pyplot as plt
3+
import sffLib
4+
5+
if __name__ == "__main__":
6+
complexValues = sffLib.SpectrogramFile(os.path.dirname(__file__)+"/complex.sff").values
7+
8+
# values is a 2D numpy array of Complex values
9+
print("DATA TYPE:", type(complexValues))
10+
print("DATA SHAPE:", complexValues.shape)
11+
12+
# you can work with individual Complex data values
13+
# using X/Y coordinates (X is time, Y is frequency)
14+
print("EXAMPLE VALUE:", complexValues[3, 5])

dev/sff/sffDemoSimple.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import os
2+
import matplotlib.pyplot as plt
3+
import numpy as np
4+
import sffLib
5+
6+
if __name__ == "__main__":
7+
8+
# hal.sff is a file of stored FFT magnitude (not complex data)
9+
sf = sffLib.SpectrogramFile(os.path.dirname(__file__)+"/hal.sff")
10+
11+
# plot the spectrogram as a heatmap
12+
freqs = np.arange(sf.values.shape[1]) * sf.hzPerPx / 1000
13+
times = np.arange(sf.values.shape[0]) * sf.secPerPx
14+
plt.pcolormesh(freqs, times, sf.values)
15+
16+
# decorate the plot
17+
plt.colorbar()
18+
plt.title("Spectrogram Magnitude (RMS)")
19+
plt.ylabel("Time (seconds)")
20+
plt.xlabel("Frequency (kHz)")
21+
plt.savefig(os.path.dirname(__file__)+"/hal.png")
22+
plt.show()

dev/sff/sffLib.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import os
2+
import numpy as np
3+
import struct
4+
import datetime
5+
import time
6+
import math
7+
8+
9+
class SpectrogramFile:
10+
11+
def __init__(self, filePath):
12+
13+
timeStart = time.perf_counter()
14+
15+
print(f"Spectrogram from file: {os.path.basename(filePath)}")
16+
self.filePath = os.path.abspath(filePath)
17+
18+
with open(filePath, 'rb') as f:
19+
filebytes = f.read()
20+
21+
# validate file format
22+
magicNumber = struct.unpack("<l", filebytes[0:4])[0]
23+
if magicNumber != 1179014099:
24+
raise Exception("invalid file format")
25+
else:
26+
print(f"Validated file format (magic number: {magicNumber:,})")
27+
28+
# read version
29+
self.versionMajor = int(filebytes[40])
30+
self.versionMinor = int(filebytes[41])
31+
print(f"SFF version: {self.versionMajor}.{self.versionMinor}")
32+
33+
# read time information
34+
self.sampleRate = struct.unpack("<l", filebytes[42:46])[0]
35+
self.stepSize = struct.unpack("<l", filebytes[46:50])[0]
36+
self.stepCount = struct.unpack("<l", filebytes[50:54])[0]
37+
print(f"Sample rate: {self.sampleRate} Hz")
38+
print(f"Step size: {self.stepSize} samples")
39+
print(f"Step count: {self.stepCount} steps")
40+
41+
# read frequency information
42+
self.fftSize = struct.unpack("<l", filebytes[54:58])[0]
43+
self.fftFirstIndex = struct.unpack("<l", filebytes[58:62])[0]
44+
self.fftHeight = struct.unpack("<l", filebytes[62:66])[0]
45+
self.offsetHz = struct.unpack("<l", filebytes[66:70])[0]
46+
print(f"FFT size: {self.fftSize}")
47+
print(f"FFT first index: {self.fftFirstIndex}")
48+
print(f"FFT height: {self.fftHeight}")
49+
print(f"FFT offset: {self.offsetHz} Hz")
50+
51+
# data format
52+
self.valuesPerPoint = int(filebytes[70])
53+
self.isComplex = int(self.valuesPerPoint) == 2
54+
self.bytesPerValue = int(filebytes[71])
55+
self.decibels = int(filebytes[72]) == 1
56+
print(f"Values per point: {self.valuesPerPoint}")
57+
print(f"Complex values: {self.isComplex}")
58+
print(f"Bytes per point: {self.bytesPerValue}")
59+
print(f"Decibels: {self.decibels}")
60+
61+
# useful class properties
62+
self.secPerPx = self.stepSize / self.sampleRate
63+
self.hzPerPx = self.sampleRate / self.fftSize
64+
print(f"Time Resolution: {self.secPerPx} sec/px")
65+
print(f"Frequency Resolution: {self.hzPerPx} Hz/px")
66+
67+
# recording start time
68+
dt = datetime.datetime(
69+
int(filebytes[74])+2000, int(filebytes[75]), int(filebytes[76]),
70+
int(filebytes[77]), int(filebytes[78]), int(filebytes[79]))
71+
print(f"Recording start (UTC): {dt}")
72+
73+
# data storage
74+
self.firstDataByte = struct.unpack("<l", filebytes[80:84])[0]
75+
print(f"First data byte: {self.firstDataByte}")
76+
77+
# read data values
78+
dataShape = (self.stepCount, self.fftHeight)
79+
bytesPerPoint = self.bytesPerValue * self.valuesPerPoint
80+
bytesPerColumn = self.fftHeight * bytesPerPoint
81+
82+
if (self.isComplex):
83+
self.values = np.zeros(dataShape, dtype=np.complex_)
84+
for x in range(self.stepCount):
85+
columnOffset = bytesPerColumn * x
86+
for y in range(self.fftHeight):
87+
rowOffset = y * bytesPerPoint
88+
valueOffset = self.firstDataByte + columnOffset + rowOffset
89+
bytesReal = filebytes[valueOffset:valueOffset+8]
90+
bytesImag = filebytes[valueOffset+8:valueOffset+8+8]
91+
valueReal = struct.unpack("<d", bytesReal)[0]
92+
valueImag = struct.unpack("<d", bytesImag)[0]
93+
self.values[x, y] = valueReal + valueImag * 1j
94+
else:
95+
self.values = np.zeros(dataShape, dtype=np.float)
96+
for x in range(self.stepCount):
97+
columnOffset = bytesPerColumn * x
98+
for y in range(self.fftHeight):
99+
rowOffset = y * bytesPerPoint
100+
valueOffset = self.firstDataByte + columnOffset + rowOffset
101+
bytesMag = filebytes[valueOffset:valueOffset+8]
102+
self.values[x, y] = struct.unpack("<d", bytesMag)[0]
103+
104+
print(f"Loaded {os.path.basename(self.filePath)} " +
105+
f"({self.valuesPerPoint * self.fftHeight * self.stepCount:,} values) " +
106+
f"in {(time.perf_counter() - timeStart)*1000:.02f} ms")
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
using NUnit.Framework;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Text;
5+
6+
namespace Spectrogram.Tests
7+
{
8+
class FileFormat
9+
{
10+
[Test]
11+
public void Test_Save_Format()
12+
{
13+
double[] audio = Read.WavInt16mono("../../../../../data/cant-do-that-44100.wav");
14+
int sampleRate = 44100;
15+
int fftSize = 1 << 12;
16+
var spec = new Spectrogram(sampleRate, fftSize, stepSize: 700, maxFreq: 2000);
17+
spec.SetWindow(FftSharp.Window.Hanning(fftSize / 3)); // sharper window than typical
18+
spec.Add(audio);
19+
spec.SaveData("hal.sff");
20+
}
21+
}
22+
}

src/Spectrogram/SFF.cs

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Diagnostics;
4+
using System.IO;
5+
using System.Linq;
6+
using System.Text;
7+
8+
namespace Spectrogram
9+
{
10+
public class SFF
11+
{
12+
public readonly byte VersionMajor = 1;
13+
public readonly byte VersionMinor = 1;
14+
15+
// time information
16+
int SampleRate = 44100;
17+
int StepSize = 1024;
18+
int StepCount = 123;
19+
20+
// frequency information
21+
int FftSize = 1024;
22+
int FftFirstIndex = 100;
23+
int FftHeight = 824;
24+
int OffsetHz = 0;
25+
26+
public SFF()
27+
{
28+
29+
}
30+
31+
public void Load(string filePath)
32+
{
33+
byte[] bytes = File.ReadAllBytes(filePath);
34+
35+
// ensure the first 4 bytes match what we expect
36+
int magicNumber = BitConverter.ToInt32(bytes, 0);
37+
if (magicNumber != 1179014099)
38+
throw new InvalidDataException("not a valid SFF file");
39+
40+
// read file version
41+
byte versionMajor = bytes[40];
42+
byte versionMinor = bytes[41];
43+
Console.WriteLine($"SFF version {versionMajor}.{versionMinor}");
44+
45+
// read time information
46+
int sampleRate = BitConverter.ToInt32(bytes, 42);
47+
int stepSize = BitConverter.ToInt32(bytes, 46);
48+
int stepCount = BitConverter.ToInt32(bytes, 50);
49+
Console.WriteLine($"Sample rate: {sampleRate} Hz");
50+
Console.WriteLine($"Step size: {stepSize} samples");
51+
Console.WriteLine($"Step count: {stepCount} steps");
52+
53+
// read frequency information
54+
int fftSize = BitConverter.ToInt32(bytes, 54);
55+
int fftFirstIndex = BitConverter.ToInt32(bytes, 58);
56+
int fftHeight = BitConverter.ToInt32(bytes, 62);
57+
int offsetHz = BitConverter.ToInt32(bytes, 66);
58+
Console.WriteLine($"FFT size: {fftSize}");
59+
Console.WriteLine($"FFT first index: {fftFirstIndex}");
60+
Console.WriteLine($"FFT height: {fftHeight}");
61+
Console.WriteLine($"FFT offset: {offsetHz} Hz");
62+
63+
// data format
64+
byte valuesPerPoint = bytes[70];
65+
bool isComplex = valuesPerPoint == 2;
66+
byte bytesPerValue = bytes[71];
67+
bool decibels = bytes[72] == 1;
68+
Console.WriteLine($"Values per point: {valuesPerPoint}");
69+
Console.WriteLine($"Complex values: {isComplex}");
70+
Console.WriteLine($"Bytes per point: {bytesPerValue}");
71+
Console.WriteLine($"Decibels: {decibels}");
72+
73+
// recording start time
74+
DateTime dt = new DateTime(bytes[74] + 2000, bytes[75], bytes[76], bytes[77], bytes[78], bytes[79]);
75+
Console.WriteLine($"Recording start (UTC): {dt}");
76+
77+
// data storage
78+
UInt32 firstDataByte = BitConverter.ToUInt32(bytes, 80);
79+
Console.WriteLine($"First data byte: {firstDataByte}");
80+
}
81+
82+
public void Save(string filePath)
83+
{
84+
byte[] header = new byte[256];
85+
86+
// file type designator
87+
header[0] = 211; // intentionally non-ASCII
88+
header[1] = (byte)'S';
89+
header[2] = (byte)'F';
90+
header[3] = (byte)'F';
91+
header[4] = (byte)'\r';
92+
header[5] = (byte)'\n';
93+
header[6] = (byte)' ';
94+
header[7] = (byte)'\n';
95+
96+
int magicNumber = BitConverter.ToInt32(header, 0);
97+
if (magicNumber != 1179014099)
98+
throw new InvalidDataException("magic number for SFF files is 1179014099");
99+
100+
// plain text helpful for people who open this file in a text editor
101+
string fileInfo = $"Spectrogram File Format {VersionMajor}.{VersionMinor}\r\n";
102+
byte[] fileInfoBytes = Encoding.UTF8.GetBytes(fileInfo);
103+
if (fileInfoBytes.Length > 32)
104+
throw new InvalidDataException("file info cannot exceed 32 bytes");
105+
Array.Copy(fileInfoBytes, 0, header, 8, fileInfoBytes.Length);
106+
107+
// version
108+
header[40] = VersionMajor;
109+
header[41] = VersionMinor;
110+
111+
// time information
112+
Array.Copy(BitConverter.GetBytes(SampleRate), 0, header, 42, 4);
113+
Array.Copy(BitConverter.GetBytes(StepSize), 0, header, 46, 4);
114+
Array.Copy(BitConverter.GetBytes(StepCount), 0, header, 50, 4);
115+
116+
// frequency information
117+
Array.Copy(BitConverter.GetBytes(FftSize), 0, header, 54, 4);
118+
Array.Copy(BitConverter.GetBytes(FftFirstIndex), 0, header, 58, 4);
119+
Array.Copy(BitConverter.GetBytes(FftHeight), 0, header, 62, 4);
120+
Array.Copy(BitConverter.GetBytes(OffsetHz), 0, header, 66, 4);
121+
122+
// data encoding details
123+
byte valuesPerPoint = 2; // 1 for magnitude or power data, 2 for complex data
124+
byte bytesPerValue = 8; // a double is 8 bytes
125+
byte decibelUnits = 0; // 1 if units are in dB
126+
byte dataExtraByte = 0; // unused
127+
header[70] = valuesPerPoint;
128+
header[71] = bytesPerValue;
129+
header[72] = decibelUnits;
130+
header[73] = dataExtraByte;
131+
132+
// source file date and time
133+
header[74] = (byte)(DateTime.UtcNow.Year - 2000); // 2-digit year
134+
header[75] = (byte)DateTime.UtcNow.Month;
135+
header[76] = (byte)DateTime.UtcNow.Day;
136+
header[77] = (byte)DateTime.UtcNow.Hour;
137+
header[78] = (byte)DateTime.UtcNow.Minute;
138+
header[79] = (byte)DateTime.UtcNow.Second;
139+
140+
// binary data location
141+
int firstDataByte = header.Length;
142+
Array.Copy(BitConverter.GetBytes(firstDataByte), 0, header, 80, 4);
143+
144+
// create bytes to write to file
145+
int dataPointCount = FftHeight * StepCount;
146+
int bytesPerPoint = bytesPerValue * valuesPerPoint;
147+
byte[] fileBytes = new byte[header.Length + dataPointCount * bytesPerPoint];
148+
Array.Copy(header, 0, fileBytes, 0, header.Length);
149+
150+
// copy data into byte area
151+
int bytesPerColumn = FftHeight * bytesPerPoint;
152+
for (int x = 0; x < StepCount; x++)
153+
{
154+
int columnOffset = bytesPerColumn * x;
155+
for (int y = 0; y < FftHeight; y++)
156+
{
157+
int rowOffset = y * bytesPerPoint;
158+
int valueOffset = firstDataByte + columnOffset + rowOffset;
159+
double value = double.Parse($"{x}.{y}");
160+
Array.Copy(BitConverter.GetBytes(value), 0, fileBytes, valueOffset, 8);
161+
Array.Copy(BitConverter.GetBytes(-value), 0, fileBytes, valueOffset + 8, 8);
162+
}
163+
}
164+
165+
// write file to disk
166+
File.WriteAllBytes(filePath, fileBytes);
167+
}
168+
}
169+
}

0 commit comments

Comments
 (0)