Skip to content

Commit

Permalink
Improve file detection with signature check capabilities
Browse files Browse the repository at this point in the history
This allows more complex detection upon regex rules for a certain amount of
lines.
  • Loading branch information
JoeKar committed Sep 8, 2023
1 parent fbce241 commit f1bcee8
Show file tree
Hide file tree
Showing 38 changed files with 131 additions and 63 deletions.
65 changes: 58 additions & 7 deletions internal/buffer/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ type SharedBuffer struct {
origHash [md5.Size]byte
}

// SyntaxFileBuffer is a helper structure to store properties of one single syntax file
type SyntaxFileBuffer struct {
Header *highlight.Header
FileName string
SyntaxDef *highlight.Def
}

func (b *SharedBuffer) insert(pos Loc, value []byte) {
b.isModified = true
b.HasSuggestions = false
Expand Down Expand Up @@ -685,6 +692,8 @@ func (b *Buffer) UpdateRules() {
if ft == "off" {
return
}

syntaxFiles := []SyntaxFileBuffer{}
syntaxFile := ""
foundDef := false
var header *highlight.Header
Expand All @@ -706,16 +715,21 @@ func (b *Buffer) UpdateRules() {
continue
}

if ((ft == "unknown" || ft == "") && highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data)) || header.FileType == ft {
if ((ft == "unknown" || ft == "") && highlight.MatchFileName(header, b.Path)) || header.FileType == ft {
syndef, err := highlight.ParseDef(file, header)
if err != nil {
screen.TermMessage("Error parsing syntax file " + f.Name() + ": " + err.Error())
continue
}
b.SyntaxDef = syndef
syntaxFile = f.Name()
foundDef = true
break

if header.FileType == ft {
b.SyntaxDef = syndef
syntaxFile = f.Name()
break
} else {
syntaxFiles = append(syntaxFiles, SyntaxFileBuffer{header, f.Name(), syndef})
}
}
}

Expand All @@ -734,16 +748,53 @@ func (b *Buffer) UpdateRules() {
}

if ft == "unknown" || ft == "" {
if highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data) {
syntaxFile = f.Name()
break
if highlight.MatchFileName(header, b.Path) {
syntaxFiles = append(syntaxFiles, SyntaxFileBuffer{header, f.Name(), nil})
}
} else if header.FileType == ft {
syntaxFile = f.Name()
break
}
}

if syntaxFile == "" {
length := len(syntaxFiles)
if length > 0 {
signatureMatch := false
if length > 1 {
detectlimit := util.IntOpt(b.Settings["detectlimit"])
lineCount := len(b.lines)
limit := 0
if detectlimit > 0 {
if lineCount < detectlimit {
limit = lineCount
} else {
limit = detectlimit
}
} else {
limit = lineCount
}
for i := 0; i < length && !signatureMatch; i++ {
if highlight.HasFileSignature(syntaxFiles[i].Header) {
for j := 0; j < limit && !signatureMatch; j++ {
if highlight.MatchFileSignature(syntaxFiles[i].Header, b.lines[j].data) {
syntaxFile = syntaxFiles[i].FileName
b.SyntaxDef = syntaxFiles[i].SyntaxDef
header = syntaxFiles[i].Header
signatureMatch = true
}
}
}
}
}
if length == 1 || !signatureMatch {
syntaxFile = syntaxFiles[0].FileName
b.SyntaxDef = syntaxFiles[0].SyntaxDef
header = syntaxFiles[0].Header
}
}
}

if syntaxFile != "" && !foundDef {
// we found a syntax file using a syntax header file
for _, f := range config.ListRuntimeFiles(config.RTSyntax) {
Expand Down
36 changes: 26 additions & 10 deletions pkg/highlight/ftdetect.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,33 @@
package highlight

import "regexp"

// MatchFiletype will use the list of syntax definitions provided and the filename and first line of the file
// to determine the filetype of the file
// It will return the corresponding syntax definition for the filetype
func MatchFiletype(ftdetect [2]*regexp.Regexp, filename string, firstLine []byte) bool {
if ftdetect[0] != nil && ftdetect[0].MatchString(filename) {
return true
// MatchFileName will use the list of syntax definitions provided and the filename to
// determine the filetype of the file
func MatchFileName(header *Header, filename string) bool {
if header != nil {
// index 0 refers to the filename respective filetype rule
if header.FtDetect[0] != nil {
return header.FtDetect[0].MatchString(filename)
}
}

if ftdetect[1] != nil {
return ftdetect[1].Match(firstLine)
return false
}

// HasFileSignature will use the list of syntax definitions provided
// to determine if a signature is present
func HasFileSignature(header *Header) bool {
// index 1 refers to the signature rule
return header != nil && header.FtDetect[1] != nil
}

// MatchFileSignature will use the list of syntax definitions provided and a line of the file
// to determine a match of the signature
func MatchFileSignature(header *Header, line []byte) bool {
if header != nil {
// index 1 refers to the signature rule
if header.FtDetect[1] != nil {
return header.FtDetect[1].Match(line)
}
}

return false
Expand Down
12 changes: 6 additions & 6 deletions pkg/highlight/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ type HeaderYaml struct {
FileType string `yaml:"filetype"`
Detect struct {
FNameRgx string `yaml:"filename"`
HeaderRgx string `yaml:"header"`
SignatureRgx string `yaml:"signature"`
} `yaml:"detect"`
}

Expand Down Expand Up @@ -104,13 +104,13 @@ func MakeHeader(data []byte) (*Header, error) {
var err error
header.FileType = string(lines[0])
fnameRgx := string(lines[1])
headerRgx := string(lines[2])
signatureRgx := string(lines[2])

if fnameRgx != "" {
header.FtDetect[0], err = regexp.Compile(fnameRgx)
}
if headerRgx != "" {
header.FtDetect[1], err = regexp.Compile(headerRgx)
if signatureRgx != "" {
header.FtDetect[1], err = regexp.Compile(signatureRgx)
}

if err != nil {
Expand All @@ -135,8 +135,8 @@ func MakeHeaderYaml(data []byte) (*Header, error) {
if hdrYaml.Detect.FNameRgx != "" {
header.FtDetect[0], err = regexp.Compile(hdrYaml.Detect.FNameRgx)
}
if hdrYaml.Detect.HeaderRgx != "" {
header.FtDetect[1], err = regexp.Compile(hdrYaml.Detect.HeaderRgx)
if hdrYaml.Detect.SignatureRgx != "" {
header.FtDetect[1], err = regexp.Compile(hdrYaml.Detect.SignatureRgx)
}

if err != nil {
Expand Down
6 changes: 3 additions & 3 deletions runtime/help/colors.md
Original file line number Diff line number Diff line change
Expand Up @@ -267,13 +267,13 @@ detect:
```

Micro will match this regex against a given filename to detect the filetype.
You may also provide an optional `header` regex that will check the first line
of the file. For example:
You may also provide an optional `signature` regex that will check a certain
amount of lines of a file to find specific marks. For example:

```
detect:
filename: "\\.ya?ml$"
header: "%YAML"
signature: "%YAML"
```

### Syntax rules
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/PowerShell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ filetype: powershell

detect:
filename: "\\.ps(1|m1|d1)$"
#header: ""
#signature: ""

rules:
# - comment.block: # Block Comment
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Here are micro's syntax files.

Each yaml file specifies how to detect the filetype based on file extension or headers (first line of the file).
Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to a maximum of 100 lines (to limit parse times) for a best "guess".
Then there are patterns and regions linked to highlight groups which tell micro how to highlight that filetype.

Making your own syntax files is very simple. I recommend you check the file after you are finished with the
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/awk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: awk

detect:
filename: "\\.awk$"
header: "^#!.*bin/(env +)?awk( |$)"
signature: "^#!.*bin/(env +)?awk( |$)"

rules:
- preproc: "\\$[A-Za-z0-9_!@#$*?\\-]+"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/bat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: batch

detect:
filename: "(\\.bat$)"
# header: ""
# signature: ""

rules:
# Numbers
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/crontab.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: crontab

detect:
filename: "crontab$"
header: "^#.*?/etc/crontab"
signature: "^#.*?/etc/crontab"

rules:
# The time and date fields are:
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/csx.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
filetype: csharp-script
detect:
filename: "\\.csx$"
header: "^#!.*/(env +)?dotnet-script( |$)"
signature: "^#!.*/(env +)?dotnet-script( |$)"

rules:
- include: "csharp"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/fish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: fish

detect:
filename: "\\.fish$"
header: "^#!.*/(env +)?fish( |$)"
signature: "^#!.*/(env +)?fish( |$)"

rules:
# Numbers
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/godoc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ filetype: godoc

detect:
filename: "\\.godoc$"
header: package.*import
signature: package.*import

rules:
- preproc: "^[^ ].*"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/groovy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: groovy

detect:
filename: "(\\.(groovy|gy|gvy|gsh|gradle)$|^[Jj]enkinsfile$)"
header: "^#!.*/(env +)?groovy *$"
signature: "^#!.*/(env +)?groovy *$"

rules:
# And the style guide for constants is CONSTANT_CASE
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/html4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: html4

detect:
filename: "\\.htm[l]?4$"
header: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN|http://www.w3.org/TR/html4/strict.dtd\">"
signature: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN|http://www.w3.org/TR/html4/strict.dtd\">"

rules:
- error: "<[^!].*?>"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/html5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: html5

detect:
filename: "\\.htm[l]?5$"
header: "<!DOCTYPE html5>"
signature: "<!DOCTYPE html5>"

rules:
- error: "<[^!].*?>"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/javascript.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: javascript

detect:
filename: "(\\.js$|\\.es[5678]?$|\\.mjs$)"
header: "^#!.*/(env +)?node( |$)"
signature: "^#!.*/(env +)?node( |$)"

rules:
- constant.number: "\\b[-+]?([1-9][0-9]*|0[0-7]*|0x[0-9a-fA-F]+)([uU][lL]?|[lL][uU]?)?\\b"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/json.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: json

detect:
filename: "\\.json$"
header: "^\\{$"
signature: "^\\{$"

rules:
- constant.number: "\\b[-+]?([1-9][0-9]*|0[0-7]*|0x[0-9a-fA-F]+)([uU][lL]?|[lL][uU]?)?\\b"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/julia.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: julia

detect:
filename: "\\.jl$"
header: "^#!.*/(env +)?julia( |$)"
signature: "^#!.*/(env +)?julia( |$)"

rules:

Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/justfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ filetype: 'justfile'

detect:
filename: "(^\\.?[Jj]ustfile|\\.just)$"
header: "^#!.*/(env +)?[bg]?just --justfile"
signature: "^#!.*/(env +)?[bg]?just --justfile"

rules:
- preproc: "\\<(ifeq|ifdef|ifneq|ifndef|else|endif)\\>"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/mail.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: mail

detect:
filename: "(.*/mutt-.*|\\.eml)$"
header: "^From .* \\d+:\\d+:\\d+ \\d+"
signature: "^From .* \\d+:\\d+:\\d+ \\d+"

rules:
- type: "^From .*"
Expand Down
10 changes: 5 additions & 5 deletions runtime/syntax/make_headers.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ import (
type HeaderYaml struct {
FileType string `yaml:"filetype"`
Detect struct {
FNameRgx string `yaml:"filename"`
HeaderRgx string `yaml:"header"`
FNameRgx string `yaml:"filename"`
SignatureRgx string `yaml:"signature"`
} `yaml:"detect"`
}

type Header struct {
FileType string
FNameRgx string
HeaderRgx string
SignatureRgx string
}

func main() {
Expand Down Expand Up @@ -58,7 +58,7 @@ func encode(name string, c HeaderYaml) {
f, _ := os.Create(name + ".hdr")
f.WriteString(c.FileType + "\n")
f.WriteString(c.Detect.FNameRgx + "\n")
f.WriteString(c.Detect.HeaderRgx + "\n")
f.WriteString(c.Detect.SignatureRgx + "\n")
f.Close()
}

Expand All @@ -69,7 +69,7 @@ func decode(name string) Header {
var hdr Header
hdr.FileType = string(strs[0])
hdr.FNameRgx = string(strs[1])
hdr.HeaderRgx = string(strs[2])
hdr.SignatureRgx = string(strs[2])
fmt.Printf("took %v\n", time.Since(start))

return hdr
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/makefile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: makefile

detect:
filename: "([Mm]akefile|\\.ma?k)$"
header: "^#!.*/(env +)?[bg]?make( |$)"
signature: "^#!.*/(env +)?[bg]?make( |$)"

rules:
- preproc: "\\<(ifeq|ifdef|ifneq|ifndef|else|endif)\\>"
Expand Down
Loading

0 comments on commit f1bcee8

Please sign in to comment.