Skip to content

Commit 23fd223

Browse files
committed
taskcluster - Bug 1644110 Add resource-monitor.json artifacts
1 parent 028f878 commit 23fd223

12 files changed

+33919
-4
lines changed

taskcluster/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
44

55
cmake_minimum_required(VERSION 3.0)
6-
project(taskcluster VERSION 0.0.24 LANGUAGES C)
6+
project(taskcluster VERSION 0.0.25 LANGUAGES C)
77
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Taskcluster Data Processing")
88
set(CPACK_DEBIAN_PACKAGE_DEPENDS "${PACKAGE_PREFIX}-amqp (>= 0.0.1), ${PACKAGE_PREFIX}-gcp (>= 0.0.7), ${PACKAGE_PREFIX}-heka (>= 1.1.9), ${PACKAGE_PREFIX}-rjson (>= 1.1.4), ${PACKAGE_PREFIX}-cjson (>= 2.1), ${PACKAGE_PREFIX}-gzfile (>= 0.0.2), ${PACKAGE_PREFIX}-postgres (>= 2.5.1), ${PACKAGE_PREFIX}-lyaml (>= 6.2.5)")
99
string(REGEX REPLACE "[()]" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_DEBIAN_PACKAGE_DEPENDS})

taskcluster/io_modules/decoders/taskcluster/live_backing_log.lua

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ local artifact_list_schema = fh:read("*a")
9797
artifact_list_schema = rjson.parse_schema(artifact_list_schema)
9898
fh:close()
9999

100+
local resource_monitor_schema_file = cfg.taskcluster_schema_path .. "/resource_monitor.49.schema.json"
101+
fh = assert(io.open(resource_monitor_schema_file, "r"))
102+
local resource_monitor_schema = fh:read("*a")
103+
resource_monitor_schema = rjson.parse_schema(resource_monitor_schema)
104+
fh:close()
105+
local resource_monitor_path = "public/monitoring/resource-monitor.json"
100106

101107
local function get_url(path)
102108
return base_tc_url .. "/" .. path
@@ -1075,6 +1081,30 @@ local function get_artifact_list(pj, rid, recover, token)
10751081
end
10761082

10771083

1084+
local function process_resource_monitor(pj, recover)
1085+
local logtype = "resource_monitor"
1086+
local tid = pj.status.taskId
1087+
local rid = pj.runId
1088+
local path = get_artifact_path(tid, rid, resource_monitor_path)
1089+
1090+
local s = get_artifact_string(tid, path, recover, logtype)
1091+
local j = json_decode_api(s, tid, recover, logtype)
1092+
if not j or not j.version then return end
1093+
1094+
local t = {}
1095+
t.taskId = tid
1096+
t.provisionerId = pj.status.provisionerId
1097+
t.taskGroupId = pj.status.taskGroupId
1098+
t.workerType = pj.status.workerType
1099+
t.workerGroup = pj.workerGroup
1100+
t.workerId = pj.workerId
1101+
t.runId = rid
1102+
t.time = pj.status.runs[rid + 1].started
1103+
t.payload = j
1104+
inject_validated_msg(t, logtype, resource_monitor_schema, resource_moniter_file, tid)
1105+
end
1106+
1107+
10781108
local function process_artifact_history(pj, recover)
10791109
local rid = pj.runId + 1
10801110
local j
@@ -1256,6 +1286,18 @@ local function get_log_file(pj, base_msg, recover)
12561286
end
12571287

12581288

1289+
local function get_resource_monitor(pj, al, recover)
1290+
if al then
1291+
for i,v in ipairs(al.artifacts) do
1292+
if v.name == resource_monitor_path then
1293+
process_resource_monitor(pj, recover)
1294+
break
1295+
end
1296+
end
1297+
end
1298+
end
1299+
1300+
12591301
function decode(data, dh, mutable)
12601302
local pj = cjson.decode(data)
12611303
inject_pulse_task(pj, dh) -- forward all task related pulse messages to BigQuery
@@ -1266,6 +1308,7 @@ function decode(data, dh, mutable)
12661308
or ex == "exchange/taskcluster-queue/v1/task-exception" then
12671309
local recover = { pj = pj }
12681310
recover.al = process_artifact_history(pj, recover)
1311+
get_resource_monitor(pj, recover.al, recover)
12691312
recover.td = get_task_definition(pj.status.taskId, recover)
12701313
if recover.td then
12711314
inject_task_definition(pj.status.taskId, recover.td)
@@ -1283,8 +1326,16 @@ end
12831326
function decode_artifact_list_error(data, dh, mutable)
12841327
local recover = cjson.decode(data)
12851328
local pj = recover.pj
1286-
local j = get_artifact_list(pj, recover.runId)
1287-
inject_artifact_list(j)
1329+
local al = get_artifact_list(pj, recover.runId)
1330+
inject_artifact_list(al)
1331+
if recover.runId == pj.runId then get_resource_monitor(pj, al) end
1332+
end
1333+
1334+
1335+
function decode_resource_monitor_error(data, dh, mutable)
1336+
local recover = cjson.decode(data)
1337+
local pj = recover.pj
1338+
process_resource_monitor(pj)
12881339
end
12891340

12901341

taskcluster/sandboxes/heka/input/taskcluster_usage_anomaly.lua

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ local fn = string.format("/var/tmp/%s_query.json", read_config("Logger"))
4343
local qfn = string.format("/var/tmp/%s_query.sql", read_config("Logger"))
4444

4545
local sql = [[
46+
DECLARE
47+
start_date DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 8 day);
48+
DECLARE
49+
end_date DATE DEFAULT DATE_SUB(CURRENT_DATE(), INTERVAL 1 day);
4650
WITH
4751
hist AS (
4852
SELECT
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-04/schema#",
3+
"additionalProperties": true,
4+
"definitions": {
5+
"summary_struct": {
6+
"additionalProperties": true,
7+
"properties": {
8+
"maximum": {
9+
"type": "number"
10+
},
11+
"mean": {
12+
"type": "number"
13+
},
14+
"minimum": {
15+
"type": "number"
16+
}
17+
},
18+
"type": "object"
19+
},
20+
"summary_struct_int": {
21+
"additionalProperties": true,
22+
"properties": {
23+
"maximum": {
24+
"type": "integer"
25+
},
26+
"mean": {
27+
"type": "number"
28+
},
29+
"minimum": {
30+
"type": "integer"
31+
}
32+
},
33+
"type": "object"
34+
}
35+
},
36+
"description": "resource monitor",
37+
"properties": {
38+
"payload": {
39+
"additionalProperties": true,
40+
"properties": {
41+
"end": {
42+
"type": "number"
43+
},
44+
"samples": {
45+
"items": {
46+
"additionalProperties": true,
47+
"properties": {
48+
"available_memory": {
49+
"type": "integer"
50+
},
51+
"cpu": {
52+
"additionalProperties": true,
53+
"properties": {
54+
"idle": {
55+
"type": "number"
56+
},
57+
"iowait": {
58+
"type": "number"
59+
},
60+
"percent": {
61+
"type": "number"
62+
},
63+
"steal": {
64+
"type": "number"
65+
},
66+
"system": {
67+
"type": "number"
68+
},
69+
"user": {
70+
"type": "number"
71+
}
72+
},
73+
"type": "object"
74+
},
75+
"disk": {
76+
"additionalProperties": true,
77+
"properties": {
78+
"read_bytes": {
79+
"type": "integer"
80+
},
81+
"read_count": {
82+
"type": "integer"
83+
},
84+
"write_bytes": {
85+
"type": "integer"
86+
},
87+
"write_count": {
88+
"type": "integer"
89+
}
90+
},
91+
"type": "object"
92+
},
93+
"memory": {
94+
"additionalProperties": true,
95+
"properties": {
96+
"rss": {
97+
"type": "integer"
98+
},
99+
"swap": {
100+
"type": "integer"
101+
},
102+
"vms": {
103+
"type": "integer"
104+
}
105+
},
106+
"type": "object"
107+
},
108+
"network": {
109+
"additionalProperties": true,
110+
"properties": {
111+
"bytes_recv": {
112+
"type": "integer"
113+
},
114+
"bytes_sent": {
115+
"type": "integer"
116+
},
117+
"packets_recv": {
118+
"type": "integer"
119+
},
120+
"packets_sent": {
121+
"type": "integer"
122+
}
123+
},
124+
"type": "object"
125+
},
126+
"process_count": {
127+
"type": "integer"
128+
},
129+
"system_memory_used_percent": {
130+
"type": "number"
131+
},
132+
"thread_cound": {
133+
"type": "integer"
134+
},
135+
"timestamp": {
136+
"type": "integer"
137+
}
138+
},
139+
"type": "object"
140+
},
141+
"type": "array"
142+
},
143+
"start": {
144+
"type": "integer"
145+
},
146+
"summary": {
147+
"additionalProperties": true,
148+
"properties": {
149+
"available_memory": {
150+
"$ref": "#/definitions/summary_struct_int"
151+
},
152+
"cpu_iowait": {
153+
"$ref": "#/definitions/summary_struct"
154+
},
155+
"cpu_percent": {
156+
"$ref": "#/definitions/summary_struct"
157+
},
158+
"cpu_system": {
159+
"$ref": "#/definitions/summary_struct"
160+
},
161+
"cpu_user": {
162+
"$ref": "#/definitions/summary_struct"
163+
},
164+
"memory_percent": {
165+
"$ref": "#/definitions/summary_struct"
166+
},
167+
"rss": {
168+
"$ref": "#/definitions/summary_struct_int"
169+
}
170+
},
171+
"type": "object"
172+
},
173+
"system_info": {
174+
"additionalProperties": true,
175+
"properties": {
176+
"cpu_logical_count": {
177+
"type": "integer"
178+
},
179+
"cpu_physical_count": {
180+
"type": "integer"
181+
},
182+
"memory_stats": {
183+
"additionalProperties": true,
184+
"properties": {
185+
"swap_total": {
186+
"type": "integer"
187+
},
188+
"vmem_total": {
189+
"type": "integer"
190+
}
191+
},
192+
"type": "object"
193+
}
194+
},
195+
"type": "object"
196+
},
197+
"version": {
198+
"type": "integer"
199+
}
200+
},
201+
"required": [
202+
"end",
203+
"samples",
204+
"start",
205+
"summary",
206+
"system_info",
207+
"version"
208+
],
209+
"type": "object"
210+
},
211+
"provisionerId": {
212+
"type": "string"
213+
},
214+
"runId": {
215+
"type": "integer"
216+
},
217+
"taskGroupId": {
218+
"type": "string"
219+
},
220+
"taskId": {
221+
"type": "string"
222+
},
223+
"time": {
224+
"type": "string"
225+
},
226+
"workerGroup": {
227+
"type": "string"
228+
},
229+
"workerId": {
230+
"type": "string"
231+
},
232+
"workerType": {
233+
"type": "string"
234+
}
235+
},
236+
"required": [
237+
"time",
238+
"taskId",
239+
"runId",
240+
"taskGroupId",
241+
"payload"
242+
],
243+
"title": "resource monitor",
244+
"type": "object"
245+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
filename = "verify_resource_monitor.lua"
2+
message_matcher = "Type == 'resource_monitor'"
3+
ticker_interval = 0
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
-- This Source Code Form is subject to the terms of the Mozilla Public
2+
-- License, v. 2.0. If a copy of the MPL was not distributed with this
3+
-- file, You can obtain one at http://mozilla.org/MPL/2.0/.
4+
5+
require "string"
6+
require "cjson"
7+
8+
local cnt = 0
9+
10+
local expected = {231097}
11+
12+
function process_message()
13+
cnt = cnt + 1
14+
local s = read_message("Payload")
15+
local r = #s
16+
local e = expected[cnt]
17+
if r ~= e then error(string.format("bytes received %d expected: %d", r, e)) end
18+
return 0
19+
end
20+
21+
function timer_event(ns)
22+
if #expected ~= cnt then error(string.format("received %d expected: %d", cnt, #expected)) end
23+
end

taskcluster/tests/integration/taskcluster/task_USQ8K5YcQJKQgybb28cyXg_runs_0_artifacts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
"name": "public/test_info/perfherder-data-memory.json",
2424
"expires": "2020-07-29T20:55:54.564Z",
2525
"contentType": "application/json"
26+
},
27+
{
28+
"storageType": "s3",
29+
"name": "public/monitoring/resource-monitor.json",
30+
"expires": "2020-07-29T20:55:54.564Z",
31+
"contentType": "application/json"
2632
}
2733
]
2834
}

0 commit comments

Comments
 (0)