Skip to content

Commit cad5488

Browse files
committed
Speed up SSVC tree collection
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent dc8a2ab commit cad5488

1 file changed

Lines changed: 100 additions & 26 deletions

File tree

vulnerabilities/pipelines/v2_improvers/collect_ssvc_trees.py

Lines changed: 100 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#
99

1010
import logging
11+
from collections import defaultdict
1112

1213
from django.db.models import Prefetch
1314
from django.db.models import Q
@@ -35,53 +36,126 @@ def steps(cls):
3536
return (cls.collect_ssvc_data,)
3637

3738
def collect_ssvc_data(self):
38-
vulnrichment_advisories = (
39+
advisories = list(
3940
AdvisoryV2.objects.latest_per_avid()
4041
.filter(
4142
severities__scoring_system=SCORING_SYSTEMS["ssvc"],
4243
)
43-
.distinct()
4444
.prefetch_related(
4545
Prefetch(
4646
"severities",
4747
queryset=AdvisorySeverity.objects.filter(
4848
scoring_system=SCORING_SYSTEMS["ssvc"]
49-
),
50-
)
49+
).only("id", "scoring_elements"),
50+
),
51+
"aliases",
5152
)
53+
.only("id", "advisory_id")
54+
.distinct()
5255
)
5356

54-
self.log(
55-
f"Found {vulnrichment_advisories.count()} advisories from Vulnrichment with SSVC severities."
57+
self.log(f"Found {len(advisories)} advisories from Vulnrichment with SSVC severities.")
58+
advisory_ids = {a.advisory_id for a in advisories}
59+
60+
all_related = (
61+
AdvisoryV2.objects.filter(
62+
Q(advisory_id__in=advisory_ids) | Q(aliases__alias__in=advisory_ids)
63+
)
64+
.distinct()
65+
.only("id", "advisory_id")
66+
.prefetch_related("aliases")
5667
)
57-
for advisory in vulnrichment_advisories:
68+
69+
advisory_map = defaultdict(set)
70+
71+
for adv in all_related:
72+
advisory_map[adv.advisory_id].add(adv)
73+
for alias in adv.aliases.all():
74+
if alias.alias in advisory_ids:
75+
advisory_map[alias.alias].add(adv)
76+
77+
existing_ssvc = {
78+
s.source_advisory_id: s
79+
for s in SSVC.objects.filter(source_advisory_id__in=[a.id for a in advisories])
80+
}
81+
82+
to_create = []
83+
to_update = []
84+
85+
for advisory in advisories:
5886
self.log(f"Processing advisory: {advisory.advisory_id}")
87+
5988
for severity in advisory.severities.all():
6089
ssvc_vector = severity.scoring_elements
61-
self.log(f"SSVC Vector found: {ssvc_vector}")
90+
6291
try:
6392
ssvc_tree, decision = convert_vector_to_tree_and_decision(ssvc_vector)
64-
self.log(
65-
f"Advisory: {advisory.advisory_id}, SSVC Tree: {ssvc_tree}, Decision: {decision}, vector: {ssvc_vector}"
66-
)
67-
ssvc_obj, _ = SSVC.objects.get_or_create(
68-
source_advisory=advisory,
69-
defaults={
70-
"options": ssvc_tree,
71-
"decision": decision,
72-
"vector": ssvc_vector,
73-
},
74-
)
75-
# All advisories that have advisory.advisory_id in their aliases or advisory_id same as advisory.advisory_id
76-
related_advisories = AdvisoryV2.objects.filter(
77-
Q(advisory_id=advisory.advisory_id) | Q(aliases__alias=advisory.advisory_id)
78-
).distinct()
79-
related_advisories = related_advisories.exclude(id=advisory.id)
80-
ssvc_obj.related_advisories.set(related_advisories)
93+
94+
existing = existing_ssvc.get(advisory.id)
95+
96+
if existing:
97+
existing.options = ssvc_tree
98+
existing.decision = decision
99+
existing.vector = ssvc_vector
100+
to_update.append(existing)
101+
ssvc_obj = existing
102+
else:
103+
ssvc_obj = SSVC(
104+
source_advisory=advisory,
105+
options=ssvc_tree,
106+
decision=decision,
107+
vector=ssvc_vector,
108+
)
109+
to_create.append(ssvc_obj)
110+
81111
except Exception as e:
82112
logger.error(
83-
f"Failed to parse SSVC vector '{ssvc_vector}' for advisory '{advisory}': {e}"
113+
f"Failed to parse SSVC vector '{ssvc_vector}' "
114+
f"for advisory '{advisory}': {e}"
115+
)
116+
117+
SSVC.objects.bulk_create(to_create, batch_size=1000)
118+
119+
SSVC.objects.bulk_update(
120+
to_update,
121+
["options", "decision", "vector"],
122+
batch_size=1000,
123+
)
124+
125+
# Refresh newly created IDs
126+
created_ssvc = {
127+
s.source_advisory_id: s
128+
for s in SSVC.objects.filter(source_advisory_id__in=[a.id for a in advisories])
129+
}
130+
131+
through_model = SSVC.related_advisories.through
132+
133+
through_rows = []
134+
135+
for advisory in advisories:
136+
ssvc_obj = created_ssvc.get(advisory.id)
137+
138+
if not ssvc_obj:
139+
continue
140+
141+
related = advisory_map.get(advisory.advisory_id, set())
142+
143+
for related_adv in related:
144+
if related_adv.id == advisory.id:
145+
continue
146+
147+
through_rows.append(
148+
through_model(
149+
ssvc_id=ssvc_obj.id,
150+
advisoryv2_id=related_adv.id,
84151
)
152+
)
153+
154+
through_model.objects.bulk_create(
155+
through_rows,
156+
ignore_conflicts=True,
157+
batch_size=5000,
158+
)
85159

86160

87161
REVERSE_POINTS = {

0 commit comments

Comments
 (0)