Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve file detection with signature check capabilities #2819

Merged
merged 6 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 69 additions & 21 deletions internal/buffer/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,16 @@ func (b *Buffer) UpdateRules() {
if ft == "off" {
return
}

// syntaxFileBuffer is a helper structure
// to store properties of one single syntax file
type syntaxFileBuffer struct {
header *highlight.Header
fileName string
syntaxDef *highlight.Def
}

syntaxFiles := []syntaxFileBuffer{}
syntaxFile := ""
foundDef := false
var header *highlight.Header
Expand All @@ -714,41 +724,79 @@ func (b *Buffer) UpdateRules() {
continue
}

if ((ft == "unknown" || ft == "") && highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data)) || header.FileType == ft {
if ((ft == "unknown" || ft == "") && header.MatchFileName(b.Path)) || header.FileType == ft {
syndef, err := highlight.ParseDef(file, header)
if err != nil {
screen.TermMessage("Error parsing syntax file " + f.Name() + ": " + err.Error())
continue
}
b.SyntaxDef = syndef
syntaxFile = f.Name()
foundDef = true
break

if header.FileType == ft {
b.SyntaxDef = syndef
syntaxFile = f.Name()
break
} else {
syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), syndef})
}
}
}

// search in the default syntax files
for _, f := range config.ListRuntimeFiles(config.RTSyntaxHeader) {
data, err := f.Data()
if err != nil {
screen.TermMessage("Error loading syntax header file " + f.Name() + ": " + err.Error())
continue
}
if !foundDef {
JoeKar marked this conversation as resolved.
Show resolved Hide resolved
// search in the default syntax files
for _, f := range config.ListRuntimeFiles(config.RTSyntaxHeader) {
data, err := f.Data()
if err != nil {
screen.TermMessage("Error loading syntax header file " + f.Name() + ": " + err.Error())
continue
}

header, err = highlight.MakeHeader(data)
if err != nil {
screen.TermMessage("Error reading syntax header file", f.Name(), err)
continue
}
header, err = highlight.MakeHeader(data)
if err != nil {
screen.TermMessage("Error reading syntax header file", f.Name(), err)
continue
}

if ft == "unknown" || ft == "" {
if highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data) {
if ft == "unknown" || ft == "" {
if header.MatchFileName(b.Path) {
syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), nil})
}
} else if header.FileType == ft {
syntaxFile = f.Name()
break
}
} else if header.FileType == ft {
syntaxFile = f.Name()
break
}
}

if syntaxFile == "" {
length := len(syntaxFiles)
if length > 0 {
signatureMatch := false
if length > 1 {
detectlimit := util.IntOpt(b.Settings["detectlimit"])
lineCount := len(b.lines)
limit := lineCount
if detectlimit > 0 && lineCount > detectlimit {
limit = detectlimit
}
JoeKar marked this conversation as resolved.
Show resolved Hide resolved
JoeKar marked this conversation as resolved.
Show resolved Hide resolved
for i := 0; i < length && !signatureMatch; i++ {
if syntaxFiles[i].header.HasFileSignature() {
for j := 0; j < limit && !signatureMatch; j++ {
if syntaxFiles[i].header.MatchFileSignature(b.lines[j].data) {
syntaxFile = syntaxFiles[i].fileName
b.SyntaxDef = syntaxFiles[i].syntaxDef
header = syntaxFiles[i].header
signatureMatch = true
}
}
}
}
}
if length == 1 || !signatureMatch {
syntaxFile = syntaxFiles[0].fileName
b.SyntaxDef = syntaxFiles[0].syntaxDef
header = syntaxFiles[0].header
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions internal/config/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func init() {
var optionValidators = map[string]optionValidator{
"autosave": validateNonNegativeValue,
"clipboard": validateClipboard,
"detectlimit": validateNonNegativeValue,
"tabsize": validatePositiveValue,
"scrollmargin": validateNonNegativeValue,
"scrollspeed": validateNonNegativeValue,
Expand Down Expand Up @@ -282,6 +283,7 @@ var defaultCommonSettings = map[string]interface{}{
"basename": false,
"colorcolumn": float64(0),
"cursorline": true,
"detectlimit": float64(100),
"diffgutter": false,
"encoding": "utf-8",
"eofnewline": true,
Expand Down
18 changes: 0 additions & 18 deletions pkg/highlight/ftdetect.go

This file was deleted.

56 changes: 39 additions & 17 deletions pkg/highlight/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,26 @@ func (g Group) String() string {
// Then it has the rules which define how to highlight the file
type Def struct {
*Header

rules *rules
}

type Header struct {
FileType string
FtDetect [2]*regexp.Regexp
FileType string
FileNameRegex *regexp.Regexp
SignatureRegex *regexp.Regexp
}

type HeaderYaml struct {
FileType string `yaml:"filetype"`
Detect struct {
FNameRgx string `yaml:"filename"`
HeaderRgx string `yaml:"header"`
FNameRegexStr string `yaml:"filename"`
SignatureRegexStr string `yaml:"signature"`
} `yaml:"detect"`
}

type File struct {
FileType string

yamlSrc map[interface{}]interface{}
yamlSrc map[interface{}]interface{}
}

// A Pattern is one simple syntax rule
Expand Down Expand Up @@ -103,14 +102,14 @@ func MakeHeader(data []byte) (*Header, error) {
header := new(Header)
var err error
header.FileType = string(lines[0])
fnameRgx := string(lines[1])
headerRgx := string(lines[2])
fnameRegexStr := string(lines[1])
signatureRegexStr := string(lines[2])

if fnameRgx != "" {
header.FtDetect[0], err = regexp.Compile(fnameRgx)
if fnameRegexStr != "" {
header.FileNameRegex, err = regexp.Compile(fnameRegexStr)
}
if err == nil && headerRgx != "" {
header.FtDetect[1], err = regexp.Compile(headerRgx)
if err == nil && signatureRegexStr != "" {
header.SignatureRegex, err = regexp.Compile(signatureRegexStr)
}

if err != nil {
Expand All @@ -132,11 +131,11 @@ func MakeHeaderYaml(data []byte) (*Header, error) {
header := new(Header)
header.FileType = hdrYaml.FileType

if hdrYaml.Detect.FNameRgx != "" {
header.FtDetect[0], err = regexp.Compile(hdrYaml.Detect.FNameRgx)
if hdrYaml.Detect.FNameRegexStr != "" {
header.FileNameRegex, err = regexp.Compile(hdrYaml.Detect.FNameRegexStr)
}
if err == nil && hdrYaml.Detect.HeaderRgx != "" {
header.FtDetect[1], err = regexp.Compile(hdrYaml.Detect.HeaderRgx)
if err == nil && hdrYaml.Detect.SignatureRegexStr != "" {
header.SignatureRegex, err = regexp.Compile(hdrYaml.Detect.SignatureRegexStr)
}

if err != nil {
Expand All @@ -146,6 +145,29 @@ func MakeHeaderYaml(data []byte) (*Header, error) {
return header, nil
}

// MatchFileName will check the given file name with the stored regex
func (header *Header) MatchFileName(filename string) bool {
if header.FileNameRegex != nil {
return header.FileNameRegex.MatchString(filename)
}

return false
}

// HasFileSignature checks the presence of a stored signature
func (header *Header) HasFileSignature() bool {
return header.SignatureRegex != nil
}

// MatchFileSignature will check the given line with the stored regex
func (header *Header) MatchFileSignature(line []byte) bool {
if header.SignatureRegex != nil {
return header.SignatureRegex.Match(line)
}

return false
}

func ParseFile(input []byte) (f *File, err error) {
// This is just so if we have an error, we can exit cleanly and return the parse error to the user
defer func() {
Expand Down
6 changes: 3 additions & 3 deletions runtime/help/colors.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,13 @@ detect:
```

Micro will match this regex against a given filename to detect the filetype.
You may also provide an optional `header` regex that will check the first line
of the file. For example:
You may also provide an optional `signature` regex that will check a certain
amount of lines of a file to find specific marks. For example:

```
detect:
filename: "\\.ya?ml$"
header: "%YAML"
signature: "%YAML"
```

### Syntax rules
Expand Down
7 changes: 7 additions & 0 deletions runtime/help/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ Here are the available options:

default value: `true`

* `detectlimit`: if this is not set to 0, it will limit the amount of first
lines in a file that are matched to determine the filetype.
A higher limit means better accuracy of guessing the filetype, but also
taking more time.

default value: `100`

* `diffgutter`: display diff indicators before lines.

default value: `false`
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/PowerShell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ filetype: powershell

detect:
filename: "\\.ps(1|m1|d1)$"
#header: ""
#signature: ""

rules:
# - comment.block: # Block Comment
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Here are micro's syntax files.

Each yaml file specifies how to detect the filetype based on file extension or headers (first line of the file).
Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to all available lines of the file or to the value defined with the option `detectlimit` (to limit parse times) for a best "guess".
Then there are patterns and regions linked to highlight groups which tell micro how to highlight that filetype.

Making your own syntax files is very simple. I recommend you check the file after you are finished with the
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/awk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: awk

detect:
filename: "\\.awk$"
header: "^#!.*bin/(env +)?awk( |$)"
signature: "^#!.*bin/(env +)?awk( |$)"

rules:
- preproc: "\\$[A-Za-z0-9_!@#$*?\\-]+"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/bat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: batch

detect:
filename: "(\\.bat$|\\.cmd$)"
# header: ""
# signature: ""

rules:
# Numbers
Expand Down
3 changes: 2 additions & 1 deletion runtime/syntax/cpp.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
filetype: c++

detect:
filename: "(\\.c(c|pp|xx)$|\\.h(h|pp|xx)$|\\.ii?$|\\.(def)$)"
filename: "(\\.c(c|pp|xx)$|\\.h(h|pp|xx)?$|\\.ii?$|\\.(def)$)"
signature: "namespace|template|public|protected|private"

rules:
- identifier: "\\b[A-Z_][0-9A-Z_]*\\b"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/crontab.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: crontab

detect:
filename: "crontab$"
header: "^#.*?/etc/crontab"
signature: "^#.*?/etc/crontab"

rules:
# The time and date fields are:
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/csx.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
filetype: csharp-script
detect:
filename: "\\.csx$"
header: "^#!.*/(env +)?dotnet-script( |$)"
signature: "^#!.*/(env +)?dotnet-script( |$)"

rules:
- include: "csharp"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/fish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: fish

detect:
filename: "\\.fish$"
header: "^#!.*/(env +)?fish( |$)"
signature: "^#!.*/(env +)?fish( |$)"

rules:
# Numbers
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/godoc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ filetype: godoc

detect:
filename: "\\.godoc$"
header: package.*import
signature: package.*import

rules:
- preproc: "^[^ ].*"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/groovy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: groovy

detect:
filename: "(\\.(groovy|gy|gvy|gsh|gradle)$|^[Jj]enkinsfile$)"
header: "^#!.*/(env +)?groovy *$"
signature: "^#!.*/(env +)?groovy *$"

rules:
# And the style guide for constants is CONSTANT_CASE
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/html4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: html4

detect:
filename: "\\.htm[l]?4$"
header: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN|http://www.w3.org/TR/html4/strict.dtd\">"
signature: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN|http://www.w3.org/TR/html4/strict.dtd\">"

rules:
- error: "<[^!].*?>"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/html5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: html5

detect:
filename: "\\.htm[l]?5$"
header: "<!DOCTYPE html5>"
signature: "<!DOCTYPE html5>"

rules:
- error: "<[^!].*?>"
Expand Down
Loading
Loading