Skip to content

Commit a0bc0a1

Browse files
committed
perf(previous_high_low) vectorize for ~80x speedup
1 parent f06378b commit a0bc0a1

File tree

1 file changed

+74
-47
lines changed

1 file changed

+74
-47
lines changed

smartmoneyconcepts/smc.py

Lines changed: 74 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ def liquidity(cls, ohlc: DataFrame, swing_highs_lows: DataFrame, range_percent:
698698
return pd.concat([liq_series, level_series, end_series, swept_series], axis=1)
699699

700700
@classmethod
701-
def previous_high_low(cls, ohlc: DataFrame, time_frame: str = "1D") -> Series:
701+
def previous_high_low(cls, ohlc: DataFrame, time_frame: str = "1D") -> DataFrame:
702702
"""
703703
Previous High Low
704704
This method returns the previous high and low of the given time frame.
@@ -712,56 +712,83 @@ def previous_high_low(cls, ohlc: DataFrame, time_frame: str = "1D") -> Series:
712712
BrokenHigh = 1 once price has broken the previous high of the timeframe, 0 otherwise
713713
BrokenLow = 1 once price has broken the previous low of the timeframe, 0 otherwise
714714
"""
715-
715+
ohlc = ohlc.copy()
716716
ohlc.index = pd.to_datetime(ohlc.index)
717+
n = len(ohlc)
717718

718-
previous_high = np.zeros(len(ohlc), dtype=np.float32)
719-
previous_low = np.zeros(len(ohlc), dtype=np.float32)
720-
broken_high = np.zeros(len(ohlc), dtype=np.int32)
721-
broken_low = np.zeros(len(ohlc), dtype=np.int32)
722-
723-
resampled_ohlc = ohlc.resample(time_frame).agg(
724-
{
725-
"open": "first",
726-
"high": "max",
727-
"low": "min",
728-
"close": "last",
729-
"volume": "sum",
730-
}
731-
).dropna()
732-
733-
currently_broken_high = False
734-
currently_broken_low = False
735-
last_broken_time = None
736-
for i in range(len(ohlc)):
737-
resampled_previous_index = np.where(
738-
resampled_ohlc.index < ohlc.index[i]
739-
)[0]
740-
if len(resampled_previous_index) <= 1:
741-
previous_high[i] = np.nan
742-
previous_low[i] = np.nan
743-
continue
744-
resampled_previous_index = resampled_previous_index[-2]
745-
746-
if last_broken_time != resampled_previous_index:
747-
currently_broken_high = False
748-
currently_broken_low = False
749-
last_broken_time = resampled_previous_index
750-
751-
previous_high[i] = resampled_ohlc["high"].iloc[resampled_previous_index]
752-
previous_low[i] = resampled_ohlc["low"].iloc[resampled_previous_index]
753-
currently_broken_high = ohlc["high"].iloc[i] > previous_high[i] or currently_broken_high
754-
currently_broken_low = ohlc["low"].iloc[i] < previous_low[i] or currently_broken_low
755-
broken_high[i] = 1 if currently_broken_high else 0
756-
broken_low[i] = 1 if currently_broken_low else 0
757-
758-
previous_high = pd.Series(previous_high, name="PreviousHigh")
759-
previous_low = pd.Series(previous_low, name="PreviousLow")
760-
broken_high = pd.Series(broken_high, name="BrokenHigh")
761-
broken_low = pd.Series(broken_low, name="BrokenLow")
719+
# Resample to target timeframe
720+
resampled = ohlc.resample(time_frame).agg({
721+
"open": "first",
722+
"high": "max",
723+
"low": "min",
724+
"close": "last",
725+
"volume": "sum"
726+
}).dropna()
727+
728+
# Edge case: not enough resampled periods
729+
if len(resampled) < 2:
730+
return pd.concat([
731+
pd.Series(np.full(n, np.nan, dtype=np.float32), name="PreviousHigh"),
732+
pd.Series(np.full(n, np.nan, dtype=np.float32), name="PreviousLow"),
733+
pd.Series(np.zeros(n, dtype=np.int32), name="BrokenHigh"),
734+
pd.Series(np.zeros(n, dtype=np.int32), name="BrokenLow"),
735+
], axis=1)
736+
737+
resampled_times = resampled.index.values
738+
resampled_highs = resampled["high"].values
739+
resampled_lows = resampled["low"].values
740+
candle_times = ohlc.index.values
741+
742+
# For each candle, find how many resampled periods have start time < candle time
743+
# This is equivalent to: len(np.where(resampled_times < candle_time)[0])
744+
periods_before = np.searchsorted(resampled_times, candle_times, side='left')
745+
746+
# Original takes second-to-last: indices[-2] = periods_before - 2
747+
prev_period_idx = periods_before - 2
748+
749+
# Valid only if more than 1 period before (original: len > 1, i.e., >= 2 periods)
750+
valid_mask = periods_before > 1
751+
752+
# Initialize output arrays
753+
previous_high = np.full(n, np.nan, dtype=np.float32)
754+
previous_low = np.full(n, np.nan, dtype=np.float32)
755+
756+
# Fill valid entries
757+
valid_indices = np.where(valid_mask)[0]
758+
if len(valid_indices) > 0:
759+
lookup_indices = prev_period_idx[valid_indices]
760+
previous_high[valid_indices] = resampled_highs[lookup_indices]
761+
previous_low[valid_indices] = resampled_lows[lookup_indices]
762+
763+
# Group candles by their reference period for cumulative broken tracking
764+
# Original resets broken flags when the reference period changes
765+
group_changes = np.concatenate([[True], prev_period_idx[1:] != prev_period_idx[:-1]])
766+
group_id = np.cumsum(group_changes)
762767

763-
return pd.concat([previous_high, previous_low, broken_high, broken_low], axis=1)
768+
ohlc_high = ohlc["high"].values
769+
ohlc_low = ohlc["low"].values
764770

771+
# Compute cumulative max/min within each group
772+
df_temp = pd.DataFrame({
773+
'group': group_id,
774+
'high': ohlc_high,
775+
'low': ohlc_low,
776+
})
777+
778+
cummax_high = df_temp.groupby('group')['high'].cummax().values
779+
cummin_low = df_temp.groupby('group')['low'].cummin().values
780+
781+
# Broken = 1 if cumulative high > previous_high (or cummin < previous_low)
782+
broken_high = np.where(valid_mask & (cummax_high > previous_high), 1, 0).astype(np.int32)
783+
broken_low = np.where(valid_mask & (cummin_low < previous_low), 1, 0).astype(np.int32)
784+
785+
return pd.concat([
786+
pd.Series(previous_high, name="PreviousHigh"),
787+
pd.Series(previous_low, name="PreviousLow"),
788+
pd.Series(broken_high, name="BrokenHigh"),
789+
pd.Series(broken_low, name="BrokenLow"),
790+
], axis=1)
791+
765792
@classmethod
766793
def sessions(
767794
cls,

0 commit comments

Comments
 (0)