Skip to content

Commit

Permalink
Merge pull request #2819 from JoeKar/fix/file-detection
Browse files Browse the repository at this point in the history
Improve file detection with signature check capabilities
  • Loading branch information
dmaluka authored Mar 14, 2024
2 parents 3dba23a + 5bfda7b commit e424537
Show file tree
Hide file tree
Showing 41 changed files with 164 additions and 100 deletions.
90 changes: 69 additions & 21 deletions internal/buffer/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,16 @@ func (b *Buffer) UpdateRules() {
if ft == "off" {
return
}

// syntaxFileBuffer is a helper structure
// to store properties of one single syntax file
type syntaxFileBuffer struct {
header *highlight.Header
fileName string
syntaxDef *highlight.Def
}

syntaxFiles := []syntaxFileBuffer{}
syntaxFile := ""
foundDef := false
var header *highlight.Header
Expand All @@ -714,41 +724,79 @@ func (b *Buffer) UpdateRules() {
continue
}

if ((ft == "unknown" || ft == "") && highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data)) || header.FileType == ft {
if ((ft == "unknown" || ft == "") && header.MatchFileName(b.Path)) || header.FileType == ft {
syndef, err := highlight.ParseDef(file, header)
if err != nil {
screen.TermMessage("Error parsing syntax file " + f.Name() + ": " + err.Error())
continue
}
b.SyntaxDef = syndef
syntaxFile = f.Name()
foundDef = true
break

if header.FileType == ft {
b.SyntaxDef = syndef
syntaxFile = f.Name()
break
} else {
syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), syndef})
}
}
}

// search in the default syntax files
for _, f := range config.ListRuntimeFiles(config.RTSyntaxHeader) {
data, err := f.Data()
if err != nil {
screen.TermMessage("Error loading syntax header file " + f.Name() + ": " + err.Error())
continue
}
if !foundDef {
// search in the default syntax files
for _, f := range config.ListRuntimeFiles(config.RTSyntaxHeader) {
data, err := f.Data()
if err != nil {
screen.TermMessage("Error loading syntax header file " + f.Name() + ": " + err.Error())
continue
}

header, err = highlight.MakeHeader(data)
if err != nil {
screen.TermMessage("Error reading syntax header file", f.Name(), err)
continue
}
header, err = highlight.MakeHeader(data)
if err != nil {
screen.TermMessage("Error reading syntax header file", f.Name(), err)
continue
}

if ft == "unknown" || ft == "" {
if highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data) {
if ft == "unknown" || ft == "" {
if header.MatchFileName(b.Path) {
syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), nil})
}
} else if header.FileType == ft {
syntaxFile = f.Name()
break
}
} else if header.FileType == ft {
syntaxFile = f.Name()
break
}
}

if syntaxFile == "" {
length := len(syntaxFiles)
if length > 0 {
signatureMatch := false
if length > 1 {
detectlimit := util.IntOpt(b.Settings["detectlimit"])
lineCount := len(b.lines)
limit := lineCount
if detectlimit > 0 && lineCount > detectlimit {
limit = detectlimit
}
for i := 0; i < length && !signatureMatch; i++ {
if syntaxFiles[i].header.HasFileSignature() {
for j := 0; j < limit && !signatureMatch; j++ {
if syntaxFiles[i].header.MatchFileSignature(b.lines[j].data) {
syntaxFile = syntaxFiles[i].fileName
b.SyntaxDef = syntaxFiles[i].syntaxDef
header = syntaxFiles[i].header
signatureMatch = true
}
}
}
}
}
if length == 1 || !signatureMatch {
syntaxFile = syntaxFiles[0].fileName
b.SyntaxDef = syntaxFiles[0].syntaxDef
header = syntaxFiles[0].header
}
}
}

Expand Down
2 changes: 2 additions & 0 deletions internal/config/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func init() {
var optionValidators = map[string]optionValidator{
"autosave": validateNonNegativeValue,
"clipboard": validateClipboard,
"detectlimit": validateNonNegativeValue,
"tabsize": validatePositiveValue,
"scrollmargin": validateNonNegativeValue,
"scrollspeed": validateNonNegativeValue,
Expand Down Expand Up @@ -282,6 +283,7 @@ var defaultCommonSettings = map[string]interface{}{
"basename": false,
"colorcolumn": float64(0),
"cursorline": true,
"detectlimit": float64(100),
"diffgutter": false,
"encoding": "utf-8",
"eofnewline": true,
Expand Down
18 changes: 0 additions & 18 deletions pkg/highlight/ftdetect.go

This file was deleted.

56 changes: 39 additions & 17 deletions pkg/highlight/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,27 +33,26 @@ func (g Group) String() string {
// Then it has the rules which define how to highlight the file
type Def struct {
*Header

rules *rules
}

type Header struct {
FileType string
FtDetect [2]*regexp.Regexp
FileType string
FileNameRegex *regexp.Regexp
SignatureRegex *regexp.Regexp
}

type HeaderYaml struct {
FileType string `yaml:"filetype"`
Detect struct {
FNameRgx string `yaml:"filename"`
HeaderRgx string `yaml:"header"`
FNameRegexStr string `yaml:"filename"`
SignatureRegexStr string `yaml:"signature"`
} `yaml:"detect"`
}

type File struct {
FileType string

yamlSrc map[interface{}]interface{}
yamlSrc map[interface{}]interface{}
}

// A Pattern is one simple syntax rule
Expand Down Expand Up @@ -103,14 +102,14 @@ func MakeHeader(data []byte) (*Header, error) {
header := new(Header)
var err error
header.FileType = string(lines[0])
fnameRgx := string(lines[1])
headerRgx := string(lines[2])
fnameRegexStr := string(lines[1])
signatureRegexStr := string(lines[2])

if fnameRgx != "" {
header.FtDetect[0], err = regexp.Compile(fnameRgx)
if fnameRegexStr != "" {
header.FileNameRegex, err = regexp.Compile(fnameRegexStr)
}
if err == nil && headerRgx != "" {
header.FtDetect[1], err = regexp.Compile(headerRgx)
if err == nil && signatureRegexStr != "" {
header.SignatureRegex, err = regexp.Compile(signatureRegexStr)
}

if err != nil {
Expand All @@ -132,11 +131,11 @@ func MakeHeaderYaml(data []byte) (*Header, error) {
header := new(Header)
header.FileType = hdrYaml.FileType

if hdrYaml.Detect.FNameRgx != "" {
header.FtDetect[0], err = regexp.Compile(hdrYaml.Detect.FNameRgx)
if hdrYaml.Detect.FNameRegexStr != "" {
header.FileNameRegex, err = regexp.Compile(hdrYaml.Detect.FNameRegexStr)
}
if err == nil && hdrYaml.Detect.HeaderRgx != "" {
header.FtDetect[1], err = regexp.Compile(hdrYaml.Detect.HeaderRgx)
if err == nil && hdrYaml.Detect.SignatureRegexStr != "" {
header.SignatureRegex, err = regexp.Compile(hdrYaml.Detect.SignatureRegexStr)
}

if err != nil {
Expand All @@ -146,6 +145,29 @@ func MakeHeaderYaml(data []byte) (*Header, error) {
return header, nil
}

// MatchFileName will check the given file name with the stored regex
func (header *Header) MatchFileName(filename string) bool {
if header.FileNameRegex != nil {
return header.FileNameRegex.MatchString(filename)
}

return false
}

// HasFileSignature checks the presence of a stored signature
func (header *Header) HasFileSignature() bool {
return header.SignatureRegex != nil
}

// MatchFileSignature will check the given line with the stored regex
func (header *Header) MatchFileSignature(line []byte) bool {
if header.SignatureRegex != nil {
return header.SignatureRegex.Match(line)
}

return false
}

func ParseFile(input []byte) (f *File, err error) {
// This is just so if we have an error, we can exit cleanly and return the parse error to the user
defer func() {
Expand Down
6 changes: 3 additions & 3 deletions runtime/help/colors.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,13 @@ detect:
```

Micro will match this regex against a given filename to detect the filetype.
You may also provide an optional `header` regex that will check the first line
of the file. For example:
You may also provide an optional `signature` regex that will check a certain
amount of lines of a file to find specific marks. For example:

```
detect:
filename: "\\.ya?ml$"
header: "%YAML"
signature: "%YAML"
```

### Syntax rules
Expand Down
7 changes: 7 additions & 0 deletions runtime/help/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ Here are the available options:

default value: `true`

* `detectlimit`: if this is not set to 0, it will limit the amount of first
lines in a file that are matched to determine the filetype.
A higher limit means better accuracy of guessing the filetype, but also
taking more time.

default value: `100`

* `diffgutter`: display diff indicators before lines.

default value: `false`
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/PowerShell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ filetype: powershell

detect:
filename: "\\.ps(1|m1|d1)$"
#header: ""
#signature: ""

rules:
# - comment.block: # Block Comment
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Here are micro's syntax files.

Each yaml file specifies how to detect the filetype based on file extension or headers (first line of the file).
Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to all available lines of the file or to the value defined with the option `detectlimit` (to limit parse times) for a best "guess".
Then there are patterns and regions linked to highlight groups which tell micro how to highlight that filetype.

Making your own syntax files is very simple. I recommend you check the file after you are finished with the
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/awk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: awk

detect:
filename: "\\.awk$"
header: "^#!.*bin/(env +)?awk( |$)"
signature: "^#!.*bin/(env +)?awk( |$)"

rules:
- preproc: "\\$[A-Za-z0-9_!@#$*?\\-]+"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/bat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: batch

detect:
filename: "(\\.bat$|\\.cmd$)"
# header: ""
# signature: ""

rules:
# Numbers
Expand Down
3 changes: 2 additions & 1 deletion runtime/syntax/cpp.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
filetype: c++

detect:
filename: "(\\.c(c|pp|xx)$|\\.h(h|pp|xx)$|\\.ii?$|\\.(def)$)"
filename: "(\\.c(c|pp|xx)$|\\.h(h|pp|xx)?$|\\.ii?$|\\.(def)$)"
signature: "namespace|template|public|protected|private"

rules:
- identifier: "\\b[A-Z_][0-9A-Z_]*\\b"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/crontab.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: crontab

detect:
filename: "crontab$"
header: "^#.*?/etc/crontab"
signature: "^#.*?/etc/crontab"

rules:
# The time and date fields are:
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/csx.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
filetype: csharp-script
detect:
filename: "\\.csx$"
header: "^#!.*/(env +)?dotnet-script( |$)"
signature: "^#!.*/(env +)?dotnet-script( |$)"

rules:
- include: "csharp"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/fish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: fish

detect:
filename: "\\.fish$"
header: "^#!.*/(env +)?fish( |$)"
signature: "^#!.*/(env +)?fish( |$)"

rules:
# Numbers
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/godoc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ filetype: godoc

detect:
filename: "\\.godoc$"
header: package.*import
signature: package.*import

rules:
- preproc: "^[^ ].*"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/groovy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: groovy

detect:
filename: "(\\.(groovy|gy|gvy|gsh|gradle)$|^[Jj]enkinsfile$)"
header: "^#!.*/(env +)?groovy *$"
signature: "^#!.*/(env +)?groovy *$"

rules:
# And the style guide for constants is CONSTANT_CASE
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/html4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: html4

detect:
filename: "\\.htm[l]?4$"
header: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN|http://www.w3.org/TR/html4/strict.dtd\">"
signature: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN|http://www.w3.org/TR/html4/strict.dtd\">"

rules:
- error: "<[^!].*?>"
Expand Down
2 changes: 1 addition & 1 deletion runtime/syntax/html5.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ filetype: html5

detect:
filename: "\\.htm[l]?5$"
header: "<!DOCTYPE html5>"
signature: "<!DOCTYPE html5>"

rules:
- error: "<[^!].*?>"
Expand Down
Loading

0 comments on commit e424537

Please sign in to comment.