@@ -132,7 +132,10 @@ class SensorDataset:
132132 def __init__ (self , filename : str , verbose : bool = False ):
133133 self .filename = filename
134134 self .verbose = verbose
135- self .parse_result : Dict [int , List ] = defaultdict (list )
135+ self .parse_result : parser .ParseResult = parser .ParseResult (
136+ sensor_dfs = {},
137+ mic_samples = [],
138+ )
136139 # Per-SID dataframes built in _build_accessors
137140 self .sensor_dfs : Dict [int , pd .DataFrame ] = {}
138141 self .audio_stereo : Optional [np .ndarray ] = None
@@ -141,101 +144,31 @@ def __init__(self, filename: str, verbose: bool = False):
141144 self .bone_sound : Optional [np .ndarray ] = None
142145 self .df : pd .DataFrame = pd .DataFrame ()
143146
144- self .imu = _SensorAccessor (pd .DataFrame (columns = LABELS ["imu" ]), LABELS ["imu" ])
145- self .barometer = _SensorAccessor (pd .DataFrame (columns = LABELS ["barometer" ]), LABELS ["barometer" ])
146- self .ppg = _SensorAccessor (pd .DataFrame (columns = LABELS ["ppg" ]), LABELS ["ppg" ])
147- self .bone_acc = _SensorAccessor (pd .DataFrame (columns = LABELS ["bone_acc" ]), LABELS ["bone_acc" ])
148- self .optical_temp = _SensorAccessor (pd .DataFrame (columns = LABELS ["optical_temp" ]), LABELS ["optical_temp" ])
149- self .microphone = _SensorAccessor (pd .DataFrame (columns = LABELS ["microphone" ]), LABELS ["microphone" ])
150-
151- self .parser : parser .Parser = parser .Parser ({
152- self .SENSOR_SID ["imu" ]: parser .SchemePayloadParser (scheme .SensorScheme (
153- name = 'imu' ,
154- sid = self .SENSOR_SID ["imu" ],
155- groups = [
156- scheme .SensorComponentGroupScheme (
157- name = 'acc' ,
158- components = [
159- scheme .SensorComponentScheme ('x' , scheme .ParseType .FLOAT ),
160- scheme .SensorComponentScheme ('y' , scheme .ParseType .FLOAT ),
161- scheme .SensorComponentScheme ('z' , scheme .ParseType .FLOAT ),
162- ]
163- ),
164- scheme .SensorComponentGroupScheme (
165- name = 'gyro' ,
166- components = [
167- scheme .SensorComponentScheme ('x' , scheme .ParseType .FLOAT ),
168- scheme .SensorComponentScheme ('y' , scheme .ParseType .FLOAT ),
169- scheme .SensorComponentScheme ('z' , scheme .ParseType .FLOAT ),
170- ]
171- ),
172- scheme .SensorComponentGroupScheme (
173- name = 'mag' ,
174- components = [
175- scheme .SensorComponentScheme ('x' , scheme .ParseType .FLOAT ),
176- scheme .SensorComponentScheme ('y' , scheme .ParseType .FLOAT ),
177- scheme .SensorComponentScheme ('z' , scheme .ParseType .FLOAT ),
178- ]
179- ),
180- ])),
181- self .SENSOR_SID ["barometer" ]: parser .SchemePayloadParser (scheme .SensorScheme (
182- name = 'barometer' ,
183- sid = self .SENSOR_SID ["barometer" ],
184- groups = [
185- scheme .SensorComponentGroupScheme (
186- name = 'barometer' ,
187- components = [
188- scheme .SensorComponentScheme ('temperature' , scheme .ParseType .FLOAT ),
189- scheme .SensorComponentScheme ('pressure' , scheme .ParseType .FLOAT ),
190- ]
191- ),
192- ])),
193- self .SENSOR_SID ["ppg" ]: parser .SchemePayloadParser (scheme .SensorScheme (
194- name = 'ppg' ,
195- sid = self .SENSOR_SID ["ppg" ],
196- groups = [
197- scheme .SensorComponentGroupScheme (
198- name = 'ppg' ,
199- components = [
200- scheme .SensorComponentScheme ('red' , scheme .ParseType .UINT32 ),
201- scheme .SensorComponentScheme ('ir' , scheme .ParseType .UINT32 ),
202- scheme .SensorComponentScheme ('green' , scheme .ParseType .UINT32 ),
203- scheme .SensorComponentScheme ('ambient' , scheme .ParseType .UINT32 ),
204- ]
205- ),
206- ])),
207- self .SENSOR_SID ["optical_temp" ]: parser .SchemePayloadParser (scheme .SensorScheme (
208- name = 'optical_temp' ,
209- sid = self .SENSOR_SID ["optical_temp" ],
210- groups = [
211- scheme .SensorComponentGroupScheme (
212- name = 'optical_temp' ,
213- components = [
214- scheme .SensorComponentScheme ('optical_temp' , scheme .ParseType .FLOAT ),
215- ]
216- ),
217- ])),
218- self .SENSOR_SID ["bone_acc" ]: parser .SchemePayloadParser (scheme .SensorScheme (
219- name = 'bone_acc' ,
220- sid = self .SENSOR_SID ["bone_acc" ],
221- groups = [
222- scheme .SensorComponentGroupScheme (
223- name = 'bone_acc' ,
224- components = [
225- scheme .SensorComponentScheme ('x' , scheme .ParseType .INT16 ),
226- scheme .SensorComponentScheme ('y' , scheme .ParseType .INT16 ),
227- scheme .SensorComponentScheme ('z' , scheme .ParseType .INT16 ),
228- ]
229- ),
230- ])),
231- self .SENSOR_SID ["microphone" ]: parser .MicPayloadParser (
232- sample_count = 48000 ,
233- ),
234- }, verbose = verbose )
147+ for sensor_name , labels in LABELS .items ():
148+ setattr (
149+ self ,
150+ sensor_name ,
151+ _SensorAccessor (pd .DataFrame (columns = labels ), labels ),
152+ )
153+
154+ self .parser : parser .Parser = self ._build_parser (verbose = verbose )
235155
236156 self .parse ()
237157 self ._build_accessors ()
238158
159+ @classmethod
160+ def _build_parser (cls , verbose : bool = False ) -> parser .Parser :
161+ sensor_schemes = scheme .build_default_sensor_schemes (cls .SENSOR_SID )
162+ dataset_parser = parser .Parser .from_sensor_schemes (
163+ sensor_schemes = sensor_schemes ,
164+ verbose = verbose ,
165+ )
166+ dataset_parser .parsers [cls .SENSOR_SID ["microphone" ]] = parser .MicPayloadParser (
167+ sample_count = 48000 ,
168+ verbose = verbose ,
169+ )
170+ return dataset_parser
171+
239172 def parse (self ) -> None :
240173 """Parse the binary recording file into structured sensor data."""
241174 with open (self .filename , "rb" ) as f :
@@ -252,10 +185,11 @@ def _build_accessors(self) -> None:
252185 self .audio_stereo = self .parse_result .audio_stereo
253186 self .audio_df = pd .DataFrame ()
254187 self ._audio_df_sampling_rate = None
188+ self .sensor_dfs = {}
255189
256190 data_dict = self .parse_result .sensor_dfs
257191 for name , sid in self .SENSOR_SID .items ():
258- labels = LABELS .get (name , [f"val { i } " for i in range ( 0 ) ])
192+ labels = LABELS .get (name , [])
259193 if name == "microphone" :
260194 df = self .get_audio_dataframe ()
261195 elif sid in data_dict and isinstance (data_dict [sid ], pd .DataFrame ):
@@ -348,10 +282,7 @@ def get_audio_dataframe(self, sampling_rate: int = 48000) -> pd.DataFrame:
348282 if sampling_rate <= 0 :
349283 raise ValueError (f"sampling_rate must be > 0, got { sampling_rate } " )
350284
351- if (
352- self ._audio_df_sampling_rate == sampling_rate
353- and not self .audio_df .empty
354- ):
285+ if self ._audio_df_sampling_rate == sampling_rate :
355286 return self .audio_df
356287
357288 mic_packets = getattr (self .parse_result , "mic_packets" , [])
@@ -362,27 +293,17 @@ def get_audio_dataframe(self, sampling_rate: int = 48000) -> pd.DataFrame:
362293 return self .audio_df
363294
364295 timestamps : List [np .ndarray ] = []
365- inner_values : List [np .ndarray ] = []
366- outer_values : List [np .ndarray ] = []
296+ stereo_frames : List [np .ndarray ] = []
367297
368298 for packet in mic_packets :
369- samples = np .asarray (packet ["samples" ], dtype = np .int16 )
370- if samples .size < 2 :
299+ ts , stereo = parser .mic_packet_to_stereo_frames (
300+ packet = packet ,
301+ sampling_rate = sampling_rate ,
302+ )
303+ if stereo .size == 0 :
371304 continue
372-
373- # Interleaved stream: [outer0, inner0, outer1, inner1, ...]
374- frame_count = samples .size // 2
375- trimmed = samples [: frame_count * 2 ]
376-
377- outer = trimmed [0 ::2 ]
378- inner = trimmed [1 ::2 ]
379-
380- start_ts = float (packet ["timestamp" ])
381- ts = start_ts + (np .arange (frame_count , dtype = np .float64 ) / sampling_rate )
382-
383305 timestamps .append (ts )
384- inner_values .append (inner )
385- outer_values .append (outer )
306+ stereo_frames .append (stereo )
386307
387308 if not timestamps :
388309 self .audio_df = pd .DataFrame (columns = ["mic.inner" , "mic.outer" ])
@@ -391,13 +312,12 @@ def get_audio_dataframe(self, sampling_rate: int = 48000) -> pd.DataFrame:
391312 return self .audio_df
392313
393314 all_ts = np .concatenate (timestamps )
394- all_inner = np .concatenate (inner_values )
395- all_outer = np .concatenate (outer_values )
315+ all_stereo = np .vstack (stereo_frames )
396316
397317 self .audio_df = pd .DataFrame (
398318 {
399- "mic.inner" : all_inner ,
400- "mic.outer" : all_outer ,
319+ "mic.inner" : all_stereo [:, 0 ] ,
320+ "mic.outer" : all_stereo [:, 1 ] ,
401321 },
402322 index = all_ts ,
403323 )
0 commit comments