Skip to content

Commit

Permalink
More stats
Browse files Browse the repository at this point in the history
Signed-off-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday committed Oct 29, 2024
1 parent 1c4e2c8 commit 2477299
Showing 1 changed file with 40 additions and 13 deletions.
53 changes: 40 additions & 13 deletions tools/roxie/extract-roxie-timings.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def calculateDerivedStats(curRow):
timeAgentProcess = float(curRow.get("TimeAgentProcess", 0.0))
timeSoapcall = float(curRow.get("TimeSoapcall", 0.0))

agentRequestEstimate = numLeafHits + numLeafAdds # This could be completely wrong, but better than nothing
numAgentRequests = float(curRow.get("NumAgentRequests", agentRequestEstimate)) # 9.8.x only

timeLocalCpu = timeLocalExecute - timeAgentWait - timeSoapcall
timeRemoteCpu = timeAgentProcess - timeLeafRead - timeBranchRead
workerCpuLoad = timeRemoteCpu / timeAgentProcess if timeAgentProcess else 0
Expand Down Expand Up @@ -80,6 +83,9 @@ def calculateDerivedStats(curRow):
curRow["TimeLocalCpu"] = timeLocalCpu
curRow["TimeRemoteCpu"] = timeRemoteCpu

if numAgentRequests:
curRow["avgTimeAgentProcess"] = timeAgentProcess / numAgentRequests

def calculateSummaryStats(curRow, numCpus, numRows):

curRow["summary"] = "summary"
Expand Down Expand Up @@ -125,6 +131,7 @@ def printRow(curRow):

minTimeStamp = ''
maxTimeStamp = ''
minElapsed = 0

completePattern = re.compile("COMPLETE: ([^ ]*)")
elapsedPattern = re.compile("complete in ([0-9]*) ms")
Expand Down Expand Up @@ -154,6 +161,10 @@ def printRow(curRow):
line_count = 0
for row in csv_reader:
numCols = len(row);
#Protect against output which comes directly from log analytics.
if numCols == 0:
continue

rowText = row[numCols-1]

completeMatch = completePattern.search(rowText)
Expand All @@ -175,8 +186,8 @@ def printRow(curRow):
curRow["_id_"] = mapping[0].replace(",","!");

elapsedMatch = elapsedPattern.search(rowText)
if elapsedMatch:
curRow["elapsed"] = int(elapsedMatch.group(1))
elapsed = int(elapsedMatch.group(1)) if elapsedMatch else 0
curRow["elapsed"] = elapsed

#MORE: Unimplemented - allow timestamp filtering
timestamp = ''
Expand All @@ -188,6 +199,8 @@ def printRow(curRow):

if minTimeStamp == '' or timestamp < minTimeStamp:
minTimeStamp = timestamp
minElapsed = elapsed

if maxTimeStamp == '' or timestamp > maxTimeStamp:
maxTimeStamp = timestamp

Expand Down Expand Up @@ -275,6 +288,7 @@ def printRow(curRow):
allStats["WorkerCpuLoad"] = 1
allStats["TimeLocalCpu"] = 1
allStats["TimeRemoteCpu"] = 1
allStats["avgTimeAgentProcess"] = 1 # 9.8 only
allStats[' '] = 1
allStats['cpus='+str(cpus)] = 1
allStats["MaxTransactionsPerSecond"] = 1
Expand All @@ -286,7 +300,7 @@ def printRow(curRow):
try:
minTime = datetime.datetime.strptime(minTimeStamp, '%Y-%m-%d %H:%M:%S.%f')
maxTime = datetime.datetime.strptime(maxTimeStamp, '%Y-%m-%d %H:%M:%S.%f')
elapsed = (maxTime - minTime).seconds
elapsed = (maxTime - minTime).seconds + minElapsed/1000
print(f"Time range: ['{minTimeStamp}'..'{maxTimeStamp}'] = {elapsed}s")

except:
Expand All @@ -297,8 +311,11 @@ def printRow(curRow):
for statName in allStats:
headings = headings + ',' + statName

globalTotalRow = dict(_id_="summary")
numGlobalRows = 0
for service in allServices:
allRows = allServices[service]
numGlobalRows += len(allRows)

# Calculate some derived statistics.
for curRow in allRows:
Expand Down Expand Up @@ -331,6 +348,8 @@ def printRow(curRow):
totalRow[name] += value
else:
totalRow[name] = value
globalTotalRow[name] = globalTotalRow.get(name, 0) + value

calculateDerivedStats(totalRow)
calculateSummaryStats(totalRow, cpus, len(allRows))

Expand All @@ -340,7 +359,7 @@ def printRow(curRow):
for statName in allStats:
if statName in totalRow:
avgRow[statName] = float(totalRow[statName]) / numRows
calculateDerivedStats(totalRow)
calculateDerivedStats(avgRow)

# Now calculate the field values for each of the centiles that are requested
centileRows = dict()
Expand Down Expand Up @@ -369,12 +388,20 @@ def sortFunc(cur):
printRow(centileRows[centile])

print()
#These stats are only really revelant if it is including all the transactions from all services
#they may need rethinking a little.
if elapsed and numRows > 1:
perCpuTransactionsPerSecond = totalRow["perCpuTransactionsPerSecond"]
#elapsed is time from end of 1st transaction to end of last transaction - so subtract 1 from number of rows
actualTransationsPerSecond = (numRows - 1) / elapsed
expectedCpuLoad = actualTransationsPerSecond / perCpuTransactionsPerSecond if perCpuTransactionsPerSecond else 0
print(f"Transactions: Throughput={actualTransationsPerSecond}/s Time={1/actualTransationsPerSecond}s ExpectedCpuLoad={expectedCpuLoad}")
print()

#These stats are only really revelant if it is including all the transactions from all services
if reportSummary and elapsed and numGlobalRows:
calculateDerivedStats(globalTotalRow)
calculateSummaryStats(globalTotalRow, cpus, numGlobalRows)

perCpuTransactionsPerSecond = globalTotalRow["perCpuTransactionsPerSecond"]
totalDiskReads = (globalTotalRow.get("NumNodeDiskFetches", 0) + globalTotalRow.get("NumLeafDiskFetches", 0))
actualTransationsPerSecond = numGlobalRows / elapsed
expectedCpuLoad = actualTransationsPerSecond / perCpuTransactionsPerSecond if perCpuTransactionsPerSecond else 0
iops = totalDiskReads / elapsed
throughput = iops * 8192
printRow(globalTotalRow)
print()
print(f"Transactions {numGlobalRows}q {elapsed}s: Throughput={actualTransationsPerSecond:.3f}q/s Time={1/actualTransationsPerSecond:.3f}s/q ExpectedCpuLoad={expectedCpuLoad:.3f} iops={iops:.3f} throughput={throughput/1000000:.3f}MB/s")
print("All times in ms unless explicitly stated")
print()

0 comments on commit 2477299

Please sign in to comment.