diff --git a/mavisp/modules.py b/mavisp/modules.py index 8ebd5b3..d0a93c3 100644 --- a/mavisp/modules.py +++ b/mavisp/modules.py @@ -1820,16 +1820,22 @@ def ingest(self, mutations): try: popeve_df = pd.read_csv(os.path.join(self.data_dir, self.module_dir, popeve_file), - usecols=['mutant', 'popEVE'], + usecols=['mutant', 'popEVE', 'gap frequency'], dtype={ 'mutant' : 'string', - 'popEVE' : 'float32'}, + 'popEVE' : 'float32', + 'gap frequency' :'float32'}, index_col='mutant') except Exception as e: this_error = f"Exception {type(e).__name__} occurred when parsing the csv files. Arguments:{e.args}" raise MAVISpMultipleError(warning=warnings, critical=[MAVISpCriticalError(this_error)]) + + popeve_df.loc[ + popeve_df['gap frequency'] >= 0.5, + 'popEVE' + ] = pd.NA - self.data = popeve_df.rename(columns = {'popEVE' : 'popEVE score'}) + self.data = popeve_df.rename(columns = {'popEVE' : 'popEVE score'}).drop(columns = ['gap frequency']) if len(warnings) > 0: raise MAVISpMultipleError(warning=warnings, diff --git a/test_data/mavisp_web_server/dataset_info.csv b/test_data/mavisp_web_server/dataset_info.csv index 3c1cdca..d315b5a 100644 --- a/test_data/mavisp_web_server/dataset_info.csv +++ b/test_data/mavisp_web_server/dataset_info.csv @@ -1,2 +1,2 @@ Date of run,Number of mutations,Number of proteins,Number of proteins in simple mode only,Number of proteins in ensemble mode only,Number of proteins in both modes -2026-04-28,3781,1,0,0,1 +2026-05-12,3781,1,0,0,1