Skip to content

Commit

Permalink
[LOI-341] Add Neuron error logs parser (#19082)
Browse files Browse the repository at this point in the history
* Add Neuron error logs parser

* Switch number to integer in parser

* Add test results

* Fix source
  • Loading branch information
dkirov-dd authored Dec 19, 2024
1 parent ad8e988 commit 35c6db3
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 8 deletions.
37 changes: 29 additions & 8 deletions aws_neuron/assets/logs/aws_neuron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,45 @@ pipeline:
name: AWS Neuron
enabled: true
filter:
query: 'source:aws_neuron'
query: "source:aws_neuron"
processors:
- type: grok-parser
name: Operator compilation
enabled: true
source: message
samples:
- 'INFO:Neuron: => aten::Int: 96'
- 'INFO:Neuron: => aten::Int: 1 [supported]'
- 'INFO:Neuron: => aten::embedding: 3 [not supported]'
- "INFO:Neuron: => aten::Int: 96"
- "INFO:Neuron: => aten::Int: 1 [supported]"
- "INFO:Neuron: => aten::embedding: 3 [not supported]"
grok:
supportRules: operator %{word:operator.library}::%{word:operator.type}
matchRules: 'operator_rule INFO:Neuron: => %{operator}: %{integer:count}( \[%{data:not_compiled_msg}\])?'
matchRules: "operator_rule INFO:Neuron: => %{operator}: %{integer:count}(
\\[%{data:not_compiled_msg}\\])?"
- type: grok-parser
name: Message separation
enabled: true
source: message
samples:
- INFO:Neuron:Number of arithmetic operators (pre-compilation) before = 565, fused = 548, percent fused = 96.99%
- 'INFO:Neuron: => aten::layer_norm: 25'
- INFO:Neuron:Number of arithmetic operators (pre-compilation) before =
565, fused = 548, percent fused = 96.99%
- "INFO:Neuron: => aten::layer_norm: 25"
- >
2024-11-15 10:38:24.000103: 4938 ERROR ||NEURON_CC_WRAPPER||:
Compilation failed for
/tmp/ubuntu/neuroncc_compile_workdir/cf6cf570-d889-4a0c-a821-719e225d9bc8/model.MODULE_16150394314145281873+d7517139.hlo_module.pb
after 0 retries.
- >
2024-Nov-15
13:35:03.0879 6475:6475 ERROR NRT:nrt_allocate_neuron_cores NeuronCore(s)
not available - Requested:16 Available:4
grok:
supportRules: ""
matchRules: message_rule %{word:level}:Neuron:( => )?%{data:msg}
matchRules: >-
info_rule %{word:level}:Neuron:( => )?%{data:msg}
error_rule_1 %{date("yyyy-MM-dd' 'HH:mm:ss.SSSSSS"):date}: %{integer:pid} %{word:level} %{data:msg}
error_rule_2 %{date("yyyy-MMM-dd' 'HH:mm:ss.SSSS"):date} %{integer:pid}:%{integer:tid} %{word:level} %{data:msg}
- type: message-remapper
name: Define `msg` as the official message of the log
enabled: true
Expand All @@ -40,3 +56,8 @@ pipeline:
enabled: true
sources:
- level
- type: status-remapper
name: Define `level` as the official status of the log
enabled: true
sources:
- level
24 changes: 24 additions & 0 deletions aws_neuron/assets/logs/aws_neuron_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,27 @@ tests:
status: "info"
tags:
- "source:LOGS_SOURCE"
-
sample: "2024-11-15 10:38:24.000103: 4938 ERROR ||NEURON_CC_WRAPPER||: Compilation failed for /tmp/ubuntu/neuroncc_compile_workdir/cf6cf570-d889-4a0c-a821-719e225d9bc8/model.MODULE_16150394314145281873+d7517139.hlo_module.pb after 0 retries."
result:
custom:
date: 1731667104000
level: "ERROR"
pid: 4938
message: "||NEURON_CC_WRAPPER||: Compilation failed for /tmp/ubuntu/neuroncc_compile_workdir/cf6cf570-d889-4a0c-a821-719e225d9bc8/model.MODULE_16150394314145281873+d7517139.hlo_module.pb after 0 retries."
status: "error"
tags:
- "source:LOGS_SOURCE"
-
sample: "2024-Nov-15 13:35:03.0879 6475:6475 ERROR NRT:nrt_allocate_neuron_cores NeuronCore(s) not available - Requested:16 Available:4"
result:
custom:
date: 1731677703087
level: "ERROR"
pid: 6475
tid: 6475
message: " NRT:nrt_allocate_neuron_cores NeuronCore(s) not available - Requested:16 Available:4"
status: "error"
tags:
- "source:LOGS_SOURCE"

0 comments on commit 35c6db3

Please sign in to comment.