Skip to content

Commit 0b2d527

Browse files
Merge branch 'main' into remove_numba_pin
2 parents 35a4bff + b5fcb18 commit 0b2d527

File tree

6 files changed

+189
-106
lines changed

6 files changed

+189
-106
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ All notable changes to this project will be documented in this file.
66
### Changed
77
- Added improved handling of a few warning messages from h5py and numpy.
88

9+
### Fixed
10+
- Significant reduction in memory when reading UVFITS files, particularly when
11+
reading the entire file (no selection on read).
12+
913
## [3.2.5] - 2025-12-11
1014

1115
### Changed

src/pyuvdata/parameter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1183,7 +1183,7 @@ def on_moon(self):
11831183
"""Detect if this location is on the moon."""
11841184
# figure out if this is on the moon. Doing it this way limits
11851185
# attempted imports of lunarsky
1186-
if isinstance(self.value, EarthLocation):
1186+
if self.value is None or isinstance(self.value, EarthLocation):
11871187
return False
11881188
else:
11891189
try:

src/pyuvdata/uvcal/ms_cal.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,9 @@ def read_ms_cal(
281281
)
282282
if any(close_check):
283283
# Fill in the first closest entry matched
284-
time_dict[time] = np.where(close_check)[0][0]
284+
time_dict[time] = time_dict[
285+
list(time_dict)[np.where(close_check)[0][0]]
286+
]
285287
else:
286288
# Otherwise, plug in a new entry
287289
time_dict[time] = time_count

src/pyuvdata/uvdata/mwa_corr_fits.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def read_metafits(
4848
telescope_info_only=False,
4949
):
5050
# get information from metafits file
51-
with fits.open(file, memmap=True) as meta:
51+
with fits.open(file, memmap=False) as meta:
5252
meta_hdr = meta[0].header
5353

5454
telescope_name = meta_hdr.pop("TELESCOP")
@@ -734,6 +734,7 @@ def _read_fits_file(
734734
# map file number to frequency index
735735
freq_ind = coarse_ind * num_fine_chans
736736

737+
need_memmap = False
737738
if freq_inds is not None:
738739
# check that we want to read this file
739740
this_file_f_inds = np.arange(num_fine_chans) + freq_ind
@@ -767,11 +768,17 @@ def _read_fits_file(
767768
* (len(self.telescope.antenna_numbers) + 1)
768769
/ 2.0
769770
)
771+
if self.Nbls != n_orig_bls:
772+
need_memmap = True
770773
bl_frac = self.Nbls / n_orig_bls
771774

772775
n_orig_pols = 4
776+
if self.Npols != n_orig_pols:
777+
need_memmap = True
773778
pol_frac = self.Npols / n_orig_pols
774779

780+
if n_freq_read != num_fine_chans:
781+
need_memmap = True
775782
freq_frac = n_freq_read / num_fine_chans
776783

777784
blpol_frac = bl_frac * pol_frac
@@ -801,7 +808,7 @@ def _read_fits_file(
801808
axis=1,
802809
).flatten()
803810

804-
with fits.open(filename, mode="denywrite") as hdu_list:
811+
with fits.open(filename, mode="denywrite", memmap=need_memmap) as hdu_list:
805812
# if mwax, data is in every other hdu
806813
if mwax:
807814
hdu_list = hdu_list[1::2]
@@ -927,7 +934,7 @@ def _read_flag_file(self, filename, file_nums, num_fine_chans):
927934
flag_num = int(filename.split("_")[-1][0:2])
928935
# map file number to frequency index
929936
freq_ind = np.where(file_nums == flag_num)[0][0] * num_fine_chans
930-
with fits.open(filename, mode="denywrite") as aoflags:
937+
with fits.open(filename, mode="denywrite", memmap=False) as aoflags:
931938
flags = aoflags[1].data.field("FLAGS")
932939
# some flag files are longer than data; crop the end
933940
flags = flags[: self.Nblts, :]
@@ -1582,7 +1589,7 @@ def read_mwa_corr_fits(
15821589
raise ValueError("multiple metafits files in filelist")
15831590
metafits_file = filename
15841591
elif filename.lower().endswith(".fits"):
1585-
with fits.open(filename, memmap=True) as hdu_list:
1592+
with fits.open(filename, memmap=False) as hdu_list:
15861593
hdunames = fits_utils._indexhdus(hdu_list)
15871594
if "PPDS" in hdunames:
15881595
ppds_file = filename

src/pyuvdata/uvdata/uvfits.py

Lines changed: 132 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""Class for reading and writing uvfits files."""
55

66
import copy
7+
import gc
78
import os
89
import warnings
910

@@ -197,7 +198,7 @@ def _get_parameter_data(
197198

198199
def _get_data(
199200
self,
200-
vis_hdu,
201+
filename,
201202
*,
202203
antenna_nums,
203204
antenna_names,
@@ -269,17 +270,38 @@ def _get_data(
269270
)
270271

271272
if min_frac == 1:
272-
# no select, read in all the data
273-
if vis_hdu.header["NAXIS"] == 7:
274-
raw_data_array = vis_hdu.data.data[:, 0, 0, :, :, :, :]
275-
if self.Nspws != raw_data_array.shape[1]: # pragma: no cover
276-
raise RuntimeError(bad_data_shape_msg)
273+
# no select, read in all the data. Don't need memmap
274+
with fits.open(filename, memmap=False) as hdu_list:
275+
vis_hdu = hdu_list[0] # assumes the visibilities are in the primary hdu
276+
if vis_hdu.header["NAXIS"] == 7:
277+
self.data_array = vis_hdu.data.data[:, 0, 0]
278+
if (
279+
self.Nspws != self.data_array.shape[1]
280+
or len(self.data_array.shape) != 5
281+
): # pragma: no cover
282+
raise RuntimeError(bad_data_shape_msg)
283+
284+
# Reshape the data array to combine the spw & freq axes
285+
self.data_array = np.reshape(
286+
self.data_array,
287+
(self.Nblts, self.Nfreqs, self.Npols, self.data_array.shape[4]),
288+
)
289+
290+
else:
291+
self.data_array = vis_hdu.data.data[:, 0, 0, :, :, :]
292+
293+
if len(self.data_array.shape) != 4: # pragma: no cover
294+
raise RuntimeError(bad_data_shape_msg)
295+
296+
self.flag_array = self.data_array[:, :, :, 2] <= 0
297+
self.nsample_array = np.abs(self.data_array[:, :, :, 2])
298+
299+
# FITS uvw direction convention is opposite ours and Miriad's.
300+
# So conjugate the visibilities and flip the uvws:
301+
self.data_array = (
302+
self.data_array[:, :, :, 0] - 1j * self.data_array[:, :, :, 1]
303+
)
277304

278-
else:
279-
# in many uvfits files the spw axis is left out,
280-
# here we put it back in so the dimensionality stays the same
281-
raw_data_array = vis_hdu.data.data[:, 0, 0, :, :, :]
282-
raw_data_array = raw_data_array[:, np.newaxis, :, :]
283305
else:
284306
# do select operations on everything except data_array, flag_array
285307
# and nsample_array
@@ -293,72 +315,80 @@ def _get_data(
293315
)
294316

295317
# just read in the right portions of the data and flag arrays
296-
if blt_frac == min_frac:
297-
if vis_hdu.header["NAXIS"] == 7:
298-
raw_data_array = vis_hdu.data.data[blt_inds, :, :, :, :, :, :]
299-
raw_data_array = raw_data_array[:, 0, 0, :, :, :, :]
300-
if self.Nspws != raw_data_array.shape[1]: # pragma: no cover
301-
raise RuntimeError(bad_data_shape_msg)
302-
else:
303-
# in many uvfits files the spw axis is left out,
304-
# here we put it back in so the dimensionality stays the same
305-
raw_data_array = vis_hdu.data.data[blt_inds, :, :, :, :, :]
306-
raw_data_array = raw_data_array[:, 0, 0, :, :, :]
307-
raw_data_array = raw_data_array[:, np.newaxis, :, :, :]
308-
if freq_frac < 1:
309-
raw_data_array = raw_data_array[:, :, freq_inds, :, :]
310-
if pol_frac < 1:
311-
raw_data_array = raw_data_array[:, :, :, pol_inds, :]
312-
elif freq_frac == min_frac:
313-
if vis_hdu.header["NAXIS"] == 7:
314-
raw_data_array = vis_hdu.data.data[:, :, :, :, freq_inds, :, :]
315-
raw_data_array = raw_data_array[:, 0, 0, :, :, :, :]
316-
if self.Nspws != raw_data_array.shape[1]: # pragma: no cover
317-
raise RuntimeError(bad_data_shape_msg)
318-
else:
319-
# in many uvfits files the spw axis is left out,
320-
# here we put it back in so the dimensionality stays the same
321-
raw_data_array = vis_hdu.data.data[:, :, :, freq_inds, :, :]
322-
raw_data_array = raw_data_array[:, 0, 0, :, :, :]
323-
raw_data_array = raw_data_array[:, np.newaxis, :, :, :]
324-
325-
if blt_frac < 1:
326-
raw_data_array = raw_data_array[blt_inds, :, :, :, :]
327-
if pol_frac < 1:
328-
raw_data_array = raw_data_array[:, :, :, pol_inds, :]
329-
else:
330-
if vis_hdu.header["NAXIS"] == 7:
331-
raw_data_array = vis_hdu.data.data[:, :, :, :, :, pol_inds, :]
332-
raw_data_array = raw_data_array[:, 0, 0, :, :, :, :]
333-
if self.Nspws != raw_data_array.shape[1]: # pragma: no cover
334-
raise RuntimeError(bad_data_shape_msg)
318+
# need memmap for this, accept the memory hit
319+
with fits.open(filename, memmap=False) as hdu_list:
320+
vis_hdu = hdu_list[0] # assumes the visibilities are in the primary hdu
321+
if blt_frac == min_frac:
322+
if vis_hdu.header["NAXIS"] == 7:
323+
raw_data_array = vis_hdu.data.data[blt_inds, :, :, :, :, :, :]
324+
raw_data_array = raw_data_array[:, 0, 0, :, :, :, :]
325+
if self.Nspws != raw_data_array.shape[1]: # pragma: no cover
326+
raise RuntimeError(bad_data_shape_msg)
327+
else:
328+
# in many uvfits files the spw axis is left out,
329+
# here we put it back in so the dimensionality stays the same
330+
raw_data_array = vis_hdu.data.data[blt_inds, :, :, :, :, :]
331+
raw_data_array = raw_data_array[:, 0, 0, :, :, :]
332+
raw_data_array = raw_data_array[:, np.newaxis, :, :, :]
333+
if freq_frac < 1:
334+
raw_data_array = raw_data_array[:, :, freq_inds, :, :]
335+
if pol_frac < 1:
336+
raw_data_array = raw_data_array[:, :, :, pol_inds, :]
337+
elif freq_frac == min_frac:
338+
if vis_hdu.header["NAXIS"] == 7:
339+
raw_data_array = vis_hdu.data.data[:, :, :, :, freq_inds, :, :]
340+
raw_data_array = raw_data_array[:, 0, 0, :, :, :, :]
341+
if self.Nspws != raw_data_array.shape[1]: # pragma: no cover
342+
raise RuntimeError(bad_data_shape_msg)
343+
else:
344+
# in many uvfits files the spw axis is left out,
345+
# here we put it back in so the dimensionality stays the same
346+
raw_data_array = vis_hdu.data.data[:, :, :, freq_inds, :, :]
347+
raw_data_array = raw_data_array[:, 0, 0, :, :, :]
348+
raw_data_array = raw_data_array[:, np.newaxis, :, :, :]
349+
350+
if blt_frac < 1:
351+
raw_data_array = raw_data_array[blt_inds, :, :, :, :]
352+
if pol_frac < 1:
353+
raw_data_array = raw_data_array[:, :, :, pol_inds, :]
335354
else:
336-
# in many uvfits files the spw axis is left out,
337-
# here we put it back in so the dimensionality stays the same
338-
raw_data_array = vis_hdu.data.data[:, :, :, :, pol_inds, :]
339-
raw_data_array = raw_data_array[:, 0, 0, :, :, :]
340-
raw_data_array = raw_data_array[:, np.newaxis, :, :, :]
341-
342-
if blt_frac < 1:
343-
raw_data_array = raw_data_array[blt_inds, :, :, :, :]
344-
if freq_frac < 1:
345-
raw_data_array = raw_data_array[:, :, freq_inds, :, :]
346-
347-
if len(raw_data_array.shape) != 5: # pragma: no cover
348-
raise RuntimeError(bad_data_shape_msg)
349-
350-
# Reshape the data array to be the right size if we are working w/ multiple
351-
# spectral windows
352-
raw_data_array = np.reshape(
353-
raw_data_array,
354-
(self.Nblts, self.Nfreqs, self.Npols, raw_data_array.shape[4]),
355-
)
355+
if vis_hdu.header["NAXIS"] == 7:
356+
raw_data_array = vis_hdu.data.data[:, :, :, :, :, pol_inds, :]
357+
raw_data_array = raw_data_array[:, 0, 0, :, :, :, :]
358+
if self.Nspws != raw_data_array.shape[1]: # pragma: no cover
359+
raise RuntimeError(bad_data_shape_msg)
360+
else:
361+
# in many uvfits files the spw axis is left out,
362+
# here we put it back in so the dimensionality stays the same
363+
raw_data_array = vis_hdu.data.data[:, :, :, :, pol_inds, :]
364+
raw_data_array = raw_data_array[:, 0, 0, :, :, :]
365+
raw_data_array = raw_data_array[:, np.newaxis, :, :, :]
366+
367+
if blt_frac < 1:
368+
raw_data_array = raw_data_array[blt_inds, :, :, :, :]
369+
if freq_frac < 1:
370+
raw_data_array = raw_data_array[:, :, freq_inds, :, :]
371+
372+
if len(raw_data_array.shape) != 5: # pragma: no cover
373+
raise RuntimeError(bad_data_shape_msg)
374+
375+
# Reshape the data array to be the right size if we are working w/ multiple
376+
# spectral windows
377+
raw_data_array = np.reshape(
378+
raw_data_array,
379+
(self.Nblts, self.Nfreqs, self.Npols, raw_data_array.shape[4]),
380+
)
356381

357-
# FITS uvw direction convention is opposite ours and Miriad's.
358-
# So conjugate the visibilities and flip the uvws:
359-
self.data_array = raw_data_array[:, :, :, 0] - 1j * raw_data_array[:, :, :, 1]
360-
self.flag_array = raw_data_array[:, :, :, 2] <= 0
361-
self.nsample_array = np.abs(raw_data_array[:, :, :, 2])
382+
# FITS uvw direction convention is opposite ours and Miriad's.
383+
# So conjugate the visibilities and flip the uvws:
384+
self.data_array = (
385+
raw_data_array[:, :, :, 0] - 1j * raw_data_array[:, :, :, 1]
386+
)
387+
self.flag_array = raw_data_array[:, :, :, 2] <= 0
388+
self.nsample_array = np.abs(raw_data_array[:, :, :, 2])
389+
390+
del raw_data_array
391+
gc.collect()
362392

363393
if fix_old_proj:
364394
self.fix_phase(use_ant_pos=fix_use_ant_pos)
@@ -403,7 +433,9 @@ def read_uvfits(
403433
self.filename = [basename]
404434
self._filename.form = (1,)
405435

406-
with fits.open(filename, memmap=True) as hdu_list:
436+
# memmap=True uses a lot of memory that doesn't get deallocated quickly
437+
# not needed for reading the headers, which is what we're doing here
438+
with fits.open(filename, memmap=False) as hdu_list:
407439
vis_hdu = hdu_list[0] # assumes the visibilities are in the primary hdu
408440
vis_hdr = vis_hdu.header.copy()
409441
hdunames = fits_utils._indexhdus(hdu_list) # find the rest of the tables
@@ -642,7 +674,7 @@ def read_uvfits(
642674
# the array center, but in a rotated ECEF frame so that the x-axis
643675
# goes through the local meridian.
644676
rot_ecef_positions = ant_hdu.data.field("STABXYZ")
645-
_, longitude, altitude = utils.LatLonAlt_from_XYZ(
677+
_, longitude, _ = utils.LatLonAlt_from_XYZ(
646678
np.array([x_telescope, y_telescope, z_telescope]),
647679
frame=telescope_frame,
648680
ellipsoid=ellipsoid,
@@ -825,29 +857,29 @@ def read_uvfits(
825857
rot_axis=0,
826858
)[:, :, 0]
827859

828-
if read_data:
829-
# Now read in the data
830-
self._get_data(
831-
vis_hdu,
832-
antenna_nums=antenna_nums,
833-
antenna_names=antenna_names,
834-
ant_str=ant_str,
835-
bls=bls,
836-
frequencies=frequencies,
837-
freq_chans=freq_chans,
838-
spws=spws,
839-
times=times,
840-
time_range=time_range,
841-
lsts=lsts,
842-
lst_range=lst_range,
843-
polarizations=polarizations,
844-
blt_inds=blt_inds,
845-
phase_center_ids=phase_center_ids,
846-
catalog_names=catalog_names,
847-
keep_all_metadata=keep_all_metadata,
848-
fix_old_proj=fix_old_proj,
849-
fix_use_ant_pos=fix_use_ant_pos,
850-
)
860+
if read_data:
861+
# Now read in the data
862+
self._get_data(
863+
filename,
864+
antenna_nums=antenna_nums,
865+
antenna_names=antenna_names,
866+
ant_str=ant_str,
867+
bls=bls,
868+
frequencies=frequencies,
869+
freq_chans=freq_chans,
870+
spws=spws,
871+
times=times,
872+
time_range=time_range,
873+
lsts=lsts,
874+
lst_range=lst_range,
875+
polarizations=polarizations,
876+
blt_inds=blt_inds,
877+
phase_center_ids=phase_center_ids,
878+
catalog_names=catalog_names,
879+
keep_all_metadata=keep_all_metadata,
880+
fix_old_proj=fix_old_proj,
881+
fix_use_ant_pos=fix_use_ant_pos,
882+
)
851883

852884
# check if object has all required UVParameters set
853885
if run_check:

0 commit comments

Comments
 (0)