From 433879046e18c79a6780080b233b04154582cbf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Thu, 4 May 2023 23:48:42 +0200 Subject: [PATCH 1/5] Improve file detection with signature check capabilities This allows more complex detection upon regex rules for a certain amount of lines. --- internal/buffer/buffer.go | 54 ++++++++++++++++++++++++---- pkg/highlight/ftdetect.go | 18 ---------- pkg/highlight/parser.go | 56 +++++++++++++++++++++--------- runtime/help/colors.md | 6 ++-- runtime/syntax/PowerShell.yaml | 2 +- runtime/syntax/README.md | 2 +- runtime/syntax/awk.yaml | 2 +- runtime/syntax/bat.yaml | 2 +- runtime/syntax/crontab.yaml | 2 +- runtime/syntax/csx.yaml | 2 +- runtime/syntax/fish.yaml | 2 +- runtime/syntax/godoc.yaml | 2 +- runtime/syntax/groovy.yaml | 2 +- runtime/syntax/html4.yaml | 2 +- runtime/syntax/html5.yaml | 2 +- runtime/syntax/javascript.yaml | 2 +- runtime/syntax/json.yaml | 2 +- runtime/syntax/julia.yaml | 2 +- runtime/syntax/justfile.yaml | 2 +- runtime/syntax/mail.yaml | 2 +- runtime/syntax/make_headers.go | 17 ++++----- runtime/syntax/makefile.yaml | 2 +- runtime/syntax/nginx.yaml | 2 +- runtime/syntax/patch.yaml | 2 +- runtime/syntax/perl.yaml | 2 +- runtime/syntax/python2.yaml | 2 +- runtime/syntax/python3.yaml | 2 +- runtime/syntax/ruby.yaml | 2 +- runtime/syntax/sage.yaml | 2 +- runtime/syntax/sed.yaml | 2 +- runtime/syntax/sh.yaml | 2 +- runtime/syntax/syntax_converter.go | 2 +- runtime/syntax/systemd.yaml | 2 +- runtime/syntax/tcl.yaml | 2 +- runtime/syntax/xml.yaml | 2 +- runtime/syntax/yaml.yaml | 2 +- runtime/syntax/zsh.yaml | 2 +- 37 files changed, 130 insertions(+), 85 deletions(-) delete mode 100644 pkg/highlight/ftdetect.go diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index dc4d037f1..3e4fc7669 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -685,6 +685,16 @@ func (b *Buffer) UpdateRules() { if ft == "off" { return } + + // syntaxFileBuffer is a helper structure + // to store properties of one single syntax file + type syntaxFileBuffer struct { + header *highlight.Header + fileName string + syntaxDef *highlight.Def + } + + syntaxFiles := []syntaxFileBuffer{} syntaxFile := "" foundDef := false var header *highlight.Header @@ -707,16 +717,21 @@ func (b *Buffer) UpdateRules() { continue } - if ((ft == "unknown" || ft == "") && highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data)) || header.FileType == ft { + if ((ft == "unknown" || ft == "") && header.MatchFileName(b.Path)) || header.FileType == ft { syndef, err := highlight.ParseDef(file, header) if err != nil { screen.TermMessage("Error parsing syntax file " + f.Name() + ": " + err.Error()) continue } - b.SyntaxDef = syndef - syntaxFile = f.Name() foundDef = true - break + + if header.FileType == ft { + b.SyntaxDef = syndef + syntaxFile = f.Name() + break + } else { + syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), syndef}) + } } } @@ -735,9 +750,8 @@ func (b *Buffer) UpdateRules() { } if ft == "unknown" || ft == "" { - if highlight.MatchFiletype(header.FtDetect, b.Path, b.lines[0].data) { - syntaxFile = f.Name() - break + if header.MatchFileName(b.Path) { + syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), nil}) } } else if header.FileType == ft { syntaxFile = f.Name() @@ -745,6 +759,32 @@ func (b *Buffer) UpdateRules() { } } + if syntaxFile == "" { + length := len(syntaxFiles) + if length > 0 { + signatureMatch := false + if length > 1 { + for i := 0; i < length && !signatureMatch; i++ { + if syntaxFiles[i].header.HasFileSignature() { + for j := 0; j < 100 && !signatureMatch; j++ { + if syntaxFiles[i].header.MatchFileSignature(b.lines[j].data) { + syntaxFile = syntaxFiles[i].fileName + b.SyntaxDef = syntaxFiles[i].syntaxDef + header = syntaxFiles[i].header + signatureMatch = true + } + } + } + } + } + if length == 1 || !signatureMatch { + syntaxFile = syntaxFiles[0].fileName + b.SyntaxDef = syntaxFiles[0].syntaxDef + header = syntaxFiles[0].header + } + } + } + if syntaxFile != "" && !foundDef { // we found a syntax file using a syntax header file for _, f := range config.ListRuntimeFiles(config.RTSyntax) { diff --git a/pkg/highlight/ftdetect.go b/pkg/highlight/ftdetect.go deleted file mode 100644 index 580ade8fc..000000000 --- a/pkg/highlight/ftdetect.go +++ /dev/null @@ -1,18 +0,0 @@ -package highlight - -import "regexp" - -// MatchFiletype will use the list of syntax definitions provided and the filename and first line of the file -// to determine the filetype of the file -// It will return the corresponding syntax definition for the filetype -func MatchFiletype(ftdetect [2]*regexp.Regexp, filename string, firstLine []byte) bool { - if ftdetect[0] != nil && ftdetect[0].MatchString(filename) { - return true - } - - if ftdetect[1] != nil { - return ftdetect[1].Match(firstLine) - } - - return false -} diff --git a/pkg/highlight/parser.go b/pkg/highlight/parser.go index f46d8259d..92e290fe8 100644 --- a/pkg/highlight/parser.go +++ b/pkg/highlight/parser.go @@ -33,27 +33,26 @@ func (g Group) String() string { // Then it has the rules which define how to highlight the file type Def struct { *Header - rules *rules } type Header struct { - FileType string - FtDetect [2]*regexp.Regexp + FileType string + FileNameRegex *regexp.Regexp + SignatureRegex *regexp.Regexp } type HeaderYaml struct { FileType string `yaml:"filetype"` Detect struct { - FNameRgx string `yaml:"filename"` - HeaderRgx string `yaml:"header"` + FNameRegexStr string `yaml:"filename"` + SignatureRegexStr string `yaml:"signature"` } `yaml:"detect"` } type File struct { FileType string - - yamlSrc map[interface{}]interface{} + yamlSrc map[interface{}]interface{} } // A Pattern is one simple syntax rule @@ -103,14 +102,14 @@ func MakeHeader(data []byte) (*Header, error) { header := new(Header) var err error header.FileType = string(lines[0]) - fnameRgx := string(lines[1]) - headerRgx := string(lines[2]) + fnameRegexStr := string(lines[1]) + signatureRegexStr := string(lines[2]) - if fnameRgx != "" { - header.FtDetect[0], err = regexp.Compile(fnameRgx) + if fnameRegexStr != "" { + header.FileNameRegex, err = regexp.Compile(fnameRegexStr) } - if err == nil && headerRgx != "" { - header.FtDetect[1], err = regexp.Compile(headerRgx) + if err == nil && signatureRegexStr != "" { + header.SignatureRegex, err = regexp.Compile(signatureRegexStr) } if err != nil { @@ -132,11 +131,11 @@ func MakeHeaderYaml(data []byte) (*Header, error) { header := new(Header) header.FileType = hdrYaml.FileType - if hdrYaml.Detect.FNameRgx != "" { - header.FtDetect[0], err = regexp.Compile(hdrYaml.Detect.FNameRgx) + if hdrYaml.Detect.FNameRegexStr != "" { + header.FileNameRegex, err = regexp.Compile(hdrYaml.Detect.FNameRegexStr) } - if err == nil && hdrYaml.Detect.HeaderRgx != "" { - header.FtDetect[1], err = regexp.Compile(hdrYaml.Detect.HeaderRgx) + if err == nil && hdrYaml.Detect.SignatureRegexStr != "" { + header.SignatureRegex, err = regexp.Compile(hdrYaml.Detect.SignatureRegexStr) } if err != nil { @@ -146,6 +145,29 @@ func MakeHeaderYaml(data []byte) (*Header, error) { return header, nil } +// MatchFileName will check the given file name with the stored regex +func (header *Header) MatchFileName(filename string) bool { + if header.FileNameRegex != nil { + return header.FileNameRegex.MatchString(filename) + } + + return false +} + +// HasFileSignature checks the presence of a stored signature +func (header *Header) HasFileSignature() bool { + return header.SignatureRegex != nil +} + +// MatchFileSignature will check the given line with the stored regex +func (header *Header) MatchFileSignature(line []byte) bool { + if header.SignatureRegex != nil { + return header.SignatureRegex.Match(line) + } + + return false +} + func ParseFile(input []byte) (f *File, err error) { // This is just so if we have an error, we can exit cleanly and return the parse error to the user defer func() { diff --git a/runtime/help/colors.md b/runtime/help/colors.md index ac8b4be37..4a3ee7e0c 100644 --- a/runtime/help/colors.md +++ b/runtime/help/colors.md @@ -267,13 +267,13 @@ detect: ``` Micro will match this regex against a given filename to detect the filetype. -You may also provide an optional `header` regex that will check the first line -of the file. For example: +You may also provide an optional `signature` regex that will check a certain +amount of lines of a file to find specific marks. For example: ``` detect: filename: "\\.ya?ml$" - header: "%YAML" + signature: "%YAML" ``` ### Syntax rules diff --git a/runtime/syntax/PowerShell.yaml b/runtime/syntax/PowerShell.yaml index 7a45e426e..daaa2b210 100644 --- a/runtime/syntax/PowerShell.yaml +++ b/runtime/syntax/PowerShell.yaml @@ -5,7 +5,7 @@ filetype: powershell detect: filename: "\\.ps(1|m1|d1)$" - #header: "" + #signature: "" rules: # - comment.block: # Block Comment diff --git a/runtime/syntax/README.md b/runtime/syntax/README.md index 7d1a9de4c..e97a4f77e 100644 --- a/runtime/syntax/README.md +++ b/runtime/syntax/README.md @@ -2,7 +2,7 @@ Here are micro's syntax files. -Each yaml file specifies how to detect the filetype based on file extension or headers (first line of the file). +Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to a maximum of 100 lines (to limit parse times) for a best "guess". Then there are patterns and regions linked to highlight groups which tell micro how to highlight that filetype. Making your own syntax files is very simple. I recommend you check the file after you are finished with the diff --git a/runtime/syntax/awk.yaml b/runtime/syntax/awk.yaml index ff3f6988e..93ddf9ae9 100644 --- a/runtime/syntax/awk.yaml +++ b/runtime/syntax/awk.yaml @@ -2,7 +2,7 @@ filetype: awk detect: filename: "\\.awk$" - header: "^#!.*bin/(env +)?awk( |$)" + signature: "^#!.*bin/(env +)?awk( |$)" rules: - preproc: "\\$[A-Za-z0-9_!@#$*?\\-]+" diff --git a/runtime/syntax/bat.yaml b/runtime/syntax/bat.yaml index 2ef8d9875..741f74377 100644 --- a/runtime/syntax/bat.yaml +++ b/runtime/syntax/bat.yaml @@ -2,7 +2,7 @@ filetype: batch detect: filename: "(\\.bat$|\\.cmd$)" - # header: "" + # signature: "" rules: # Numbers diff --git a/runtime/syntax/crontab.yaml b/runtime/syntax/crontab.yaml index cebc7cad6..aec2e78ff 100644 --- a/runtime/syntax/crontab.yaml +++ b/runtime/syntax/crontab.yaml @@ -2,7 +2,7 @@ filetype: crontab detect: filename: "crontab$" - header: "^#.*?/etc/crontab" + signature: "^#.*?/etc/crontab" rules: # The time and date fields are: diff --git a/runtime/syntax/csx.yaml b/runtime/syntax/csx.yaml index a3a13a6c5..3710af178 100644 --- a/runtime/syntax/csx.yaml +++ b/runtime/syntax/csx.yaml @@ -1,7 +1,7 @@ filetype: csharp-script detect: filename: "\\.csx$" - header: "^#!.*/(env +)?dotnet-script( |$)" + signature: "^#!.*/(env +)?dotnet-script( |$)" rules: - include: "csharp" diff --git a/runtime/syntax/fish.yaml b/runtime/syntax/fish.yaml index 88798a04a..e50780970 100644 --- a/runtime/syntax/fish.yaml +++ b/runtime/syntax/fish.yaml @@ -2,7 +2,7 @@ filetype: fish detect: filename: "\\.fish$" - header: "^#!.*/(env +)?fish( |$)" + signature: "^#!.*/(env +)?fish( |$)" rules: # Numbers diff --git a/runtime/syntax/godoc.yaml b/runtime/syntax/godoc.yaml index b77262965..4aa1bc457 100644 --- a/runtime/syntax/godoc.yaml +++ b/runtime/syntax/godoc.yaml @@ -5,7 +5,7 @@ filetype: godoc detect: filename: "\\.godoc$" - header: package.*import + signature: package.*import rules: - preproc: "^[^ ].*" diff --git a/runtime/syntax/groovy.yaml b/runtime/syntax/groovy.yaml index 3aa0e2834..a19cdcd38 100644 --- a/runtime/syntax/groovy.yaml +++ b/runtime/syntax/groovy.yaml @@ -2,7 +2,7 @@ filetype: groovy detect: filename: "(\\.(groovy|gy|gvy|gsh|gradle)$|^[Jj]enkinsfile$)" - header: "^#!.*/(env +)?groovy *$" + signature: "^#!.*/(env +)?groovy *$" rules: # And the style guide for constants is CONSTANT_CASE diff --git a/runtime/syntax/html4.yaml b/runtime/syntax/html4.yaml index c132d61e9..a7cfae3f0 100644 --- a/runtime/syntax/html4.yaml +++ b/runtime/syntax/html4.yaml @@ -2,7 +2,7 @@ filetype: html4 detect: filename: "\\.htm[l]?4$" - header: "" + signature: "" rules: - error: "<[^!].*?>" diff --git a/runtime/syntax/html5.yaml b/runtime/syntax/html5.yaml index 411d53858..97bffde27 100644 --- a/runtime/syntax/html5.yaml +++ b/runtime/syntax/html5.yaml @@ -2,7 +2,7 @@ filetype: html5 detect: filename: "\\.htm[l]?5$" - header: "" + signature: "" rules: - error: "<[^!].*?>" diff --git a/runtime/syntax/javascript.yaml b/runtime/syntax/javascript.yaml index b2bfe4873..0b42caa69 100644 --- a/runtime/syntax/javascript.yaml +++ b/runtime/syntax/javascript.yaml @@ -2,7 +2,7 @@ filetype: javascript detect: filename: "(\\.js$|\\.es[5678]?$|\\.mjs$)" - header: "^#!.*/(env +)?node( |$)" + signature: "^#!.*/(env +)?node( |$)" rules: - constant.number: "\\b[-+]?([1-9][0-9]*|0[0-7]*|0x[0-9a-fA-F]+)([uU][lL]?|[lL][uU]?)?\\b" diff --git a/runtime/syntax/json.yaml b/runtime/syntax/json.yaml index c590bd388..35b483e9f 100644 --- a/runtime/syntax/json.yaml +++ b/runtime/syntax/json.yaml @@ -2,7 +2,7 @@ filetype: json detect: filename: "\\.json$" - header: "^\\{$" + signature: "^\\{$" rules: - constant.number: "\\b[-+]?([1-9][0-9]*|0[0-7]*|0x[0-9a-fA-F]+)([uU][lL]?|[lL][uU]?)?\\b" diff --git a/runtime/syntax/julia.yaml b/runtime/syntax/julia.yaml index c96ef0f34..8a46e5cf2 100644 --- a/runtime/syntax/julia.yaml +++ b/runtime/syntax/julia.yaml @@ -2,7 +2,7 @@ filetype: julia detect: filename: "\\.jl$" - header: "^#!.*/(env +)?julia( |$)" + signature: "^#!.*/(env +)?julia( |$)" rules: diff --git a/runtime/syntax/justfile.yaml b/runtime/syntax/justfile.yaml index 926edb21e..2a856edb8 100644 --- a/runtime/syntax/justfile.yaml +++ b/runtime/syntax/justfile.yaml @@ -3,7 +3,7 @@ filetype: 'justfile' detect: filename: "(^\\.?[Jj]ustfile|\\.just)$" - header: "^#!.*/(env +)?[bg]?just --justfile" + signature: "^#!.*/(env +)?[bg]?just --justfile" rules: - preproc: "\\<(ifeq|ifdef|ifneq|ifndef|else|endif)\\>" diff --git a/runtime/syntax/mail.yaml b/runtime/syntax/mail.yaml index 57aa03441..a400b28b7 100644 --- a/runtime/syntax/mail.yaml +++ b/runtime/syntax/mail.yaml @@ -2,7 +2,7 @@ filetype: mail detect: filename: "(.*/mutt-.*|\\.eml)$" - header: "^From .* \\d+:\\d+:\\d+ \\d+" + signature: "^From .* \\d+:\\d+:\\d+ \\d+" rules: - type: "^From .*" diff --git a/runtime/syntax/make_headers.go b/runtime/syntax/make_headers.go index e04d80627..c80c680e6 100644 --- a/runtime/syntax/make_headers.go +++ b/runtime/syntax/make_headers.go @@ -1,4 +1,5 @@ -//+build ignore +//go:build ignore +// +build ignore package main @@ -16,15 +17,15 @@ import ( type HeaderYaml struct { FileType string `yaml:"filetype"` Detect struct { - FNameRgx string `yaml:"filename"` - HeaderRgx string `yaml:"header"` + FNameRgx string `yaml:"filename"` + SignatureRgx string `yaml:"signature"` } `yaml:"detect"` } type Header struct { - FileType string - FNameRgx string - HeaderRgx string + FileType string + FNameRgx string + SignatureRgx string } func main() { @@ -58,7 +59,7 @@ func encode(name string, c HeaderYaml) { f, _ := os.Create(name + ".hdr") f.WriteString(c.FileType + "\n") f.WriteString(c.Detect.FNameRgx + "\n") - f.WriteString(c.Detect.HeaderRgx + "\n") + f.WriteString(c.Detect.SignatureRgx + "\n") f.Close() } @@ -69,7 +70,7 @@ func decode(name string) Header { var hdr Header hdr.FileType = string(strs[0]) hdr.FNameRgx = string(strs[1]) - hdr.HeaderRgx = string(strs[2]) + hdr.SignatureRgx = string(strs[2]) fmt.Printf("took %v\n", time.Since(start)) return hdr diff --git a/runtime/syntax/makefile.yaml b/runtime/syntax/makefile.yaml index 7e90cdeb7..670935fa7 100644 --- a/runtime/syntax/makefile.yaml +++ b/runtime/syntax/makefile.yaml @@ -2,7 +2,7 @@ filetype: makefile detect: filename: "([Mm]akefile|\\.ma?k)$" - header: "^#!.*/(env +)?[bg]?make( |$)" + signature: "^#!.*/(env +)?[bg]?make( |$)" rules: - preproc: "\\<(ifeq|ifdef|ifneq|ifndef|else|endif)\\>" diff --git a/runtime/syntax/nginx.yaml b/runtime/syntax/nginx.yaml index c2223b5a3..c35e213ef 100644 --- a/runtime/syntax/nginx.yaml +++ b/runtime/syntax/nginx.yaml @@ -2,7 +2,7 @@ filetype: nginx detect: filename: "nginx.*\\.conf$|\\.nginx$" - header: "^(server|upstream)[a-z ]*\\{$" + signature: "^(server|upstream)[a-z ]*\\{$" rules: - preproc: "\\b(events|server|http|location|upstream)[[:space:]]*\\{" diff --git a/runtime/syntax/patch.yaml b/runtime/syntax/patch.yaml index 996bdc38f..6275d4238 100644 --- a/runtime/syntax/patch.yaml +++ b/runtime/syntax/patch.yaml @@ -2,7 +2,7 @@ filetype: patch detect: filename: "\\.(patch|diff)$" - header: "^diff" + signature: "^diff" rules: - brightgreen: "^\\+.*" diff --git a/runtime/syntax/perl.yaml b/runtime/syntax/perl.yaml index 984cc24a2..5a8ad3cd3 100644 --- a/runtime/syntax/perl.yaml +++ b/runtime/syntax/perl.yaml @@ -2,7 +2,7 @@ filetype: perl detect: filename: "\\.p[lmp]$" - header: "^#!.*/(env +)?perl( |$)" + signature: "^#!.*/(env +)?perl( |$)" rules: - type: "\\b(accept|alarm|atan2|bin(d|mode)|c(aller|homp|h(dir|mod|op|own|root)|lose(dir)?|onnect|os|rypt)|d(bm(close|open)|efined|elete|ie|o|ump)|e(ach|of|val|x(ec|ists|it|p))|f(cntl|ileno|lock|ork))\\b|\\b(get(c|login|peername|pgrp|ppid|priority|pwnam|(host|net|proto|serv)byname|pwuid|grgid|(host|net)byaddr|protobynumber|servbyport)|([gs]et|end)(pw|gr|host|net|proto|serv)ent|getsock(name|opt)|gmtime|goto|grep|hex|index|int|ioctl|join)\\b|\\b(keys|kill|last|length|link|listen|local(time)?|log|lstat|m|mkdir|msg(ctl|get|snd|rcv)|next|oct|open(dir)?|ord|pack|pipe|pop|printf?|push|q|qq|qx|rand|re(ad(dir|link)?|cv|say|do|name|quire|set|turn|verse|winddir)|rindex|rmdir|s|scalar|seek(dir)?)\\b|\\b(se(lect|mctl|mget|mop|nd|tpgrp|tpriority|tsockopt)|shift|shm(ctl|get|read|write)|shutdown|sin|sleep|socket(pair)?|sort|spli(ce|t)|sprintf|sqrt|srand|stat|study|substr|symlink|sys(call|read|tem|write)|tell(dir)?|time|tr(y)?|truncate|umask)\\b|\\b(un(def|link|pack|shift)|utime|values|vec|wait(pid)?|wantarray|warn|write)\\b" diff --git a/runtime/syntax/python2.yaml b/runtime/syntax/python2.yaml index 3a993b056..42f7ffb4f 100644 --- a/runtime/syntax/python2.yaml +++ b/runtime/syntax/python2.yaml @@ -2,7 +2,7 @@ filetype: python2 detect: filename: "\\.py2$" - header: "^#!.*/(env +)?python2$" + signature: "^#!.*/(env +)?python2$" rules: diff --git a/runtime/syntax/python3.yaml b/runtime/syntax/python3.yaml index 5a060bff4..7e18df6e3 100644 --- a/runtime/syntax/python3.yaml +++ b/runtime/syntax/python3.yaml @@ -2,7 +2,7 @@ filetype: python detect: filename: "\\.py(3)?$" - header: "^#!.*/(env +)?python(3)?$" + signature: "^#!.*/(env +)?python(3)?$" rules: # built-in objects diff --git a/runtime/syntax/ruby.yaml b/runtime/syntax/ruby.yaml index f04593ce0..4a432d3ba 100644 --- a/runtime/syntax/ruby.yaml +++ b/runtime/syntax/ruby.yaml @@ -2,7 +2,7 @@ filetype: ruby detect: filename: "\\.(rb|rake|gemspec)$|^(.*[\\/])?(Gemfile|config.ru|Rakefile|Capfile|Vagrantfile|Guardfile|Appfile|Fastfile|Pluginfile|Podfile|\\.?[Bb]rewfile)$" - header: "^#!.*/(env +)?ruby( |$)" + signature: "^#!.*/(env +)?ruby( |$)" rules: - comment.bright: diff --git a/runtime/syntax/sage.yaml b/runtime/syntax/sage.yaml index e24fbeb60..8d2cb07a7 100644 --- a/runtime/syntax/sage.yaml +++ b/runtime/syntax/sage.yaml @@ -2,7 +2,7 @@ filetype: sage detect: filename: "\\.sage$" - header: "^#!.*/(env +)?sage( |$)" + signature: "^#!.*/(env +)?sage( |$)" rules: diff --git a/runtime/syntax/sed.yaml b/runtime/syntax/sed.yaml index dc5f7adc9..1b297c448 100644 --- a/runtime/syntax/sed.yaml +++ b/runtime/syntax/sed.yaml @@ -2,7 +2,7 @@ filetype: sed detect: filename: "\\.sed$" - header: "^#!.*bin/(env +)?sed( |$)" + signature: "^#!.*bin/(env +)?sed( |$)" rules: - symbol.operator: "[|^$.*+]" diff --git a/runtime/syntax/sh.yaml b/runtime/syntax/sh.yaml index ab47bf1d8..6c122de2e 100644 --- a/runtime/syntax/sh.yaml +++ b/runtime/syntax/sh.yaml @@ -24,7 +24,7 @@ filetype: shell # * bash-fc. (followed by a random string) detect: filename: "(\\.(sh|bash|ash|ebuild)$|(\\.bash(rc|_aliases|_functions|_profile)|\\.?profile|Pkgfile|pkgmk\\.conf|rc\\.conf|PKGBUILD|APKBUILD)$|bash-fc\\.)" - header: "^#!.*/(env +)?(ba)?(a)?(mk)?sh( |$)" + signature: "^#!.*/(env +)?(ba)?(a)?(mk)?sh( |$)" rules: # Numbers diff --git a/runtime/syntax/syntax_converter.go b/runtime/syntax/syntax_converter.go index d2954e3aa..f8af15dc6 100644 --- a/runtime/syntax/syntax_converter.go +++ b/runtime/syntax/syntax_converter.go @@ -137,7 +137,7 @@ func generateFile(filetype, syntax, header string, rules []interface{}) string { output += fmt.Sprintf("detect: \n filename: \"%s\"\n", strings.Replace(strings.Replace(syntax, "\\", "\\\\", -1), "\"", "\\\"", -1)) if header != "" { - output += fmt.Sprintf(" header: \"%s\"\n", strings.Replace(strings.Replace(header, "\\", "\\\\", -1), "\"", "\\\"", -1)) + output += fmt.Sprintf(" signature: \"%s\"\n", strings.Replace(strings.Replace(header, "\\", "\\\\", -1), "\"", "\\\"", -1)) } output += "\nrules:\n" diff --git a/runtime/syntax/systemd.yaml b/runtime/syntax/systemd.yaml index a8650be4b..9b6687762 100644 --- a/runtime/syntax/systemd.yaml +++ b/runtime/syntax/systemd.yaml @@ -2,7 +2,7 @@ filetype: systemd detect: filename: "\\.(service|socket|timer)$" - header: "^\\[Unit\\]$" + signature: "^\\[Unit\\]$" rules: - statement: "^(Accept|After|Alias|AllowIsolate|Also|ANSI_COLOR|_AUDIT_LOGINUID|_AUDIT_SESSION|Backlog|Before|BindIPv6Only|BindsTo|BindToDevice|BlockIOReadBandwidth|BlockIOWeight|BlockIOWriteBandwidth|_BOOT_ID|Broadcast|BUG_REPORT_URL|BusName|Capabilities|CapabilityBoundingSet|CHASSIS|cipher|class|_CMDLINE|CODE_FILE|CODE_FUNC|CODE_LINE|_COMM|Compress|ConditionACPower|ConditionCapability|ConditionDirectoryNotEmpty|ConditionFileIsExecutable|ConditionFileNotEmpty|ConditionHost|ConditionKernelCommandLine|ConditionNull|ConditionPathExists|ConditionPathExistsGlob|ConditionPathIsDirectory|ConditionPathIsMountPoint|ConditionPathIsReadWrite|ConditionPathIsSymbolicLink|ConditionSecurity|ConditionVirtualization|Conflicts|ControlGroup|ControlGroupAttribute|ControlGroupModify|ControlGroupPersistent|controllers|Controllers|CPE_NAME|CPUAffinity|CPUSchedulingPolicy|CPUSchedulingPriority|CPUSchedulingResetOnFork|CPUShares|CrashChVT|CrashShell|__CURSOR|debug|DefaultControllers|DefaultDependencies|DefaultLimitAS|DefaultLimitCORE|DefaultLimitCPU|DefaultLimitDATA|DefaultLimitFSIZE|DefaultLimitLOCKS|DefaultLimitMEMLOCK|DefaultLimitMSGQUEUE|DefaultLimitNICE|DefaultLimitNOFILE|DefaultLimitNPROC|DefaultLimitRSS|DefaultLimitRTPRIO|DefaultLimitRTTIME|DefaultLimitSIGPENDING|DefaultLimitSTACK|DefaultStandardError|DefaultStandardOutput|Description|DeviceAllow|DeviceDeny|DirectoryMode|DirectoryNotEmpty|Documentation|DumpCore|entropy|Environment|EnvironmentFile|ERRNO|event_timeout|_EXE|ExecReload|ExecStart|ExecStartPost|ExecStartPre|ExecStop|ExecStopPost|ExecStopPre|filter|FONT|FONT_MAP|FONT_UNIMAP|ForwardToConsole|ForwardToKMsg|ForwardToSyslog|FreeBind|freq|FsckPassNo|fstab|_GID|Group|GuessMainPID|HandleHibernateKey|HandleLidSwitch|HandlePowerKey|HandleSuspendKey|hash|HibernateKeyIgnoreInhibited|HOME_URL|_HOSTNAME|ICON_NAME|ID|IdleAction|IdleActionSec|ID_LIKE|ID_MODEL|ID_MODEL_FROM_DATABASE|IgnoreOnIsolate|IgnoreOnSnapshot|IgnoreSIGPIPE|InaccessibleDirectories|InhibitDelayMaxSec|init|IOSchedulingClass|IOSchedulingPriority|IPTOS|IPTTL|JobTimeoutSec|JoinControllers|KeepAlive|KEYMAP|KEYMAP_TOGGLE|KillExcludeUsers|KillMode|KillOnlyUsers|KillSignal|KillUserProcesses|LidSwitchIgnoreInhibited|LimitAS|LimitCORE|LimitCPU|LimitDATA|LimitFSIZE|LimitLOCKS|LimitMEMLOCK|LimitMSGQUEUE|LimitNICE|LimitNOFILE|LimitNPROC|LimitRSS|LimitRTPRIO|LimitRTTIME|LimitSIGPENDING|LimitSTACK|link_priority|valueListenDatagram|ListenFIFO|ListenMessageQueue|ListenNetlink|ListenSequentialPacket|ListenSpecial|ListenStream|LogColor|LogLevel|LogLocation|LogTarget|luks|_MACHINE_ID|MakeDirectory|Mark|MaxConnections|MaxFileSec|MaxLevelConsole|MaxLevelKMsg|MaxLevelStore|MaxLevelSyslog|MaxRetentionSec|MemoryLimit|MemorySoftLimit|MESSAGE|MESSAGE_ID|MessageQueueMaxMessages|MessageQueueMessageSize|__MONOTONIC_TIMESTAMP|MountFlags|NAME|NAutoVTs|Nice|NonBlocking|NoNewPrivileges|NotifyAccess|OnActiveSec|OnBootSec|OnCalendar|OnFailure|OnFailureIsolate|OnStartupSec|OnUnitActiveSec|OnUnitInactiveSec|OOMScoreAdjust|Options|output|PAMName|PartOf|PassCredentials|PassSecurity|PathChanged|PathExists|PathExistsGlob|PathModified|PermissionsStartOnly|_PID|PIDFile|PipeSize|PowerKeyIgnoreInhibited|PRETTY_HOSTNAME|PRETTY_NAME|Priority|PRIORITY|PrivateNetwork|PrivateTmp|PropagatesReloadTo|pss|RateLimitBurst|RateLimitInterval|ReadOnlyDirectories|ReadWriteDirectories|__REALTIME_TIMESTAMP|ReceiveBuffer|RefuseManualStart|RefuseManualStop|rel|ReloadPropagatedFrom|RemainAfterExit|RequiredBy|Requires|RequiresMountsFor|RequiresOverridable|Requisite|RequisiteOverridable|ReserveVT|ResetControllers|Restart|RestartPreventExitStatus|RestartSec|RootDirectory|RootDirectoryStartOnly|RuntimeKeepFree|RuntimeMaxFileSize|RuntimeMaxUse|RuntimeWatchdogSec|samples|scale_x|scale_y|Seal|SecureBits|_SELINUX_CONTEXT|SendBuffer|SendSIGKILL|Service|ShowStatus|ShutdownWatchdogSec|size|SmackLabel|SmackLabelIPIn|SmackLabelIPOut|SocketMode|Sockets|SourcePath|_SOURCE_REALTIME_TIMESTAMP|SplitMode|StandardError|StandardInput|StandardOutput|StartLimitAction|StartLimitBurst|StartLimitInterval|static_node|StopWhenUnneeded|Storage|string_escape|none|replaceSuccessExitStatus|SupplementaryGroups|SUPPORT_URL|SuspendKeyIgnoreInhibited|SyslogFacility|SYSLOG_FACILITY|SyslogIdentifier|SYSLOG_IDENTIFIER|SyslogLevel|SyslogLevelPrefix|SYSLOG_PID|SystemCallFilter|SYSTEMD_ALIAS|_SYSTEMD_CGROUP|_SYSTEMD_OWNER_UID|SYSTEMD_READY|_SYSTEMD_SESSION|_SYSTEMD_UNIT|_SYSTEMD_USER_UNIT|SYSTEMD_WANTS|SystemKeepFree|SystemMaxFileSize|SystemMaxUse|SysVStartPriority|TCPCongestion|TCPWrapName|timeout|TimeoutSec|TimeoutStartSec|TimeoutStopSec|TimerSlackNSec|Transparent|_TRANSPORT|tries|TTYPath|TTYReset|TTYVHangup|TTYVTDisallocate|Type|_UID|UMask|Unit|User|UtmpIdentifier|VERSION|VERSION_ID|WantedBy|Wants|WatchdogSec|What|Where|WorkingDirectory)=" diff --git a/runtime/syntax/tcl.yaml b/runtime/syntax/tcl.yaml index b87a7d790..1b4ae7e5c 100644 --- a/runtime/syntax/tcl.yaml +++ b/runtime/syntax/tcl.yaml @@ -2,7 +2,7 @@ filetype: tcl detect: filename: "\\.tcl$" - header: "^#!.*/(env +)?tclsh( |$)" + signature: "^#!.*/(env +)?tclsh( |$)" rules: - statement: "\\b(after|append|array|auto_execok|auto_import|auto_load|auto_load_index|auto_qualify|binary|break|case|catch|cd|clock|close|concat|continue|else|elseif|encoding|eof|error|eval|exec|exit|expr|fblocked|fconfigure|fcopy|file|fileevent|flush|for|foreach|format|gets|glob|global|history|if|incr|info|interp|join|lappend|lindex|linsert|list|llength|load|lrange|lreplace|lsearch|lset|lsort|namespace|open|package|pid|puts|pwd|read|regexp|regsub|rename|return|scan|seek|set|socket|source|split|string|subst|switch|tclLog|tell|time|trace|unknown|unset|update|uplevel|upvar|variable|vwait|while)\\b" diff --git a/runtime/syntax/xml.yaml b/runtime/syntax/xml.yaml index df4cde811..0e9b901e8 100644 --- a/runtime/syntax/xml.yaml +++ b/runtime/syntax/xml.yaml @@ -2,7 +2,7 @@ filetype: xml detect: filename: "\\.(xml|sgml?|rng|svg|plist)$" - header: "<\\?xml.*\\?>" + signature: "<\\?xml.*\\?>" rules: - preproc: diff --git a/runtime/syntax/yaml.yaml b/runtime/syntax/yaml.yaml index 54d4a6472..c21286e4f 100644 --- a/runtime/syntax/yaml.yaml +++ b/runtime/syntax/yaml.yaml @@ -2,7 +2,7 @@ filetype: yaml detect: filename: "\\.ya?ml$" - header: "%YAML" + signature: "%YAML" rules: - type: "(^| )!!(binary|bool|float|int|map|null|omap|seq|set|str) " diff --git a/runtime/syntax/zsh.yaml b/runtime/syntax/zsh.yaml index a28321310..3b7e05939 100644 --- a/runtime/syntax/zsh.yaml +++ b/runtime/syntax/zsh.yaml @@ -2,7 +2,7 @@ filetype: zsh detect: filename: "(\\.zsh$|\\.?(zshenv|zprofile|zshrc|zlogin|zlogout)$)" - header: "^#!.*/(env +)?zsh( |$)" + signature: "^#!.*/(env +)?zsh( |$)" rules: ## Numbers From 93151f81093f02838e4592b5b35a92af4728f9e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Fri, 8 Sep 2023 21:45:00 +0200 Subject: [PATCH 2/5] syntax: Prepare a concrete signature example for objective C --- runtime/syntax/objc.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/runtime/syntax/objc.yaml b/runtime/syntax/objc.yaml index 223d6929f..d4cccac31 100644 --- a/runtime/syntax/objc.yaml +++ b/runtime/syntax/objc.yaml @@ -2,6 +2,7 @@ filetype: objective-c detect: filename: "\\.(m|mm|h)$" + signature: "(obj|objective)-c|#import|@(encode|end|interface|implementation|selector|protocol|synchronized|try|catch|finally|property|optional|required|import|autoreleasepool)" rules: - type: "\\b(float|double|CGFloat|id|bool|BOOL|Boolean|char|int|short|long|sizeof|enum|void|static|const|struct|union|typedef|extern|(un)?signed|inline|Class|SEL|IMP|NS(U)?Integer)\\b" From 2aa386f4556e49853e5c889d1d104499354d175c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Thu, 26 Oct 2023 20:24:21 +0200 Subject: [PATCH 3/5] syntax: Prepare a concrete signature example for C++ --- runtime/syntax/cpp.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/runtime/syntax/cpp.yaml b/runtime/syntax/cpp.yaml index 3c97b2c44..e84c3cd45 100644 --- a/runtime/syntax/cpp.yaml +++ b/runtime/syntax/cpp.yaml @@ -1,7 +1,8 @@ filetype: c++ detect: - filename: "(\\.c(c|pp|xx)$|\\.h(h|pp|xx)$|\\.ii?$|\\.(def)$)" + filename: "(\\.c(c|pp|xx)$|\\.h(h|pp|xx)?$|\\.ii?$|\\.(def)$)" + signature: "namespace|template|public|protected|private" rules: - identifier: "\\b[A-Z_][0-9A-Z_]*\\b" From 2d0d0416e7725fd8db16dc93816269cce7ac797f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Fri, 8 Sep 2023 20:44:00 +0200 Subject: [PATCH 4/5] buffer: Prefer user defined over built-in file types --- internal/buffer/buffer.go | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 3e4fc7669..29dd823a3 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -735,27 +735,29 @@ func (b *Buffer) UpdateRules() { } } - // search in the default syntax files - for _, f := range config.ListRuntimeFiles(config.RTSyntaxHeader) { - data, err := f.Data() - if err != nil { - screen.TermMessage("Error loading syntax header file " + f.Name() + ": " + err.Error()) - continue - } + if !foundDef { + // search in the default syntax files + for _, f := range config.ListRuntimeFiles(config.RTSyntaxHeader) { + data, err := f.Data() + if err != nil { + screen.TermMessage("Error loading syntax header file " + f.Name() + ": " + err.Error()) + continue + } - header, err = highlight.MakeHeader(data) - if err != nil { - screen.TermMessage("Error reading syntax header file", f.Name(), err) - continue - } + header, err = highlight.MakeHeader(data) + if err != nil { + screen.TermMessage("Error reading syntax header file", f.Name(), err) + continue + } - if ft == "unknown" || ft == "" { - if header.MatchFileName(b.Path) { - syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), nil}) + if ft == "unknown" || ft == "" { + if header.MatchFileName(b.Path) { + syntaxFiles = append(syntaxFiles, syntaxFileBuffer{header, f.Name(), nil}) + } + } else if header.FileType == ft { + syntaxFile = f.Name() + break } - } else if header.FileType == ft { - syntaxFile = f.Name() - break } } From 3c16df87ee622ed2e526896957da98234e081dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6ran=20Karl?= <3951388+JoeKar@users.noreply.github.com> Date: Tue, 6 Jun 2023 21:59:35 +0200 Subject: [PATCH 5/5] options: Add capability to define the line count parsed for the signature check --- internal/buffer/buffer.go | 8 +++++++- internal/config/settings.go | 2 ++ runtime/help/options.md | 7 +++++++ runtime/syntax/README.md | 2 +- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/internal/buffer/buffer.go b/internal/buffer/buffer.go index 29dd823a3..bd172d582 100644 --- a/internal/buffer/buffer.go +++ b/internal/buffer/buffer.go @@ -766,9 +766,15 @@ func (b *Buffer) UpdateRules() { if length > 0 { signatureMatch := false if length > 1 { + detectlimit := util.IntOpt(b.Settings["detectlimit"]) + lineCount := len(b.lines) + limit := lineCount + if detectlimit > 0 && lineCount > detectlimit { + limit = detectlimit + } for i := 0; i < length && !signatureMatch; i++ { if syntaxFiles[i].header.HasFileSignature() { - for j := 0; j < 100 && !signatureMatch; j++ { + for j := 0; j < limit && !signatureMatch; j++ { if syntaxFiles[i].header.MatchFileSignature(b.lines[j].data) { syntaxFile = syntaxFiles[i].fileName b.SyntaxDef = syntaxFiles[i].syntaxDef diff --git a/internal/config/settings.go b/internal/config/settings.go index 72e998f14..eca52074a 100644 --- a/internal/config/settings.go +++ b/internal/config/settings.go @@ -44,6 +44,7 @@ func init() { var optionValidators = map[string]optionValidator{ "autosave": validateNonNegativeValue, "clipboard": validateClipboard, + "detectlimit": validateNonNegativeValue, "tabsize": validatePositiveValue, "scrollmargin": validateNonNegativeValue, "scrollspeed": validateNonNegativeValue, @@ -280,6 +281,7 @@ var defaultCommonSettings = map[string]interface{}{ "basename": false, "colorcolumn": float64(0), "cursorline": true, + "detectlimit": float64(100), "diffgutter": false, "encoding": "utf-8", "eofnewline": true, diff --git a/runtime/help/options.md b/runtime/help/options.md index 3170dc4c2..376efdb52 100644 --- a/runtime/help/options.md +++ b/runtime/help/options.md @@ -100,6 +100,13 @@ Here are the available options: default value: `true` +* `detectlimit`: if this is not set to 0, it will limit the amount of first + lines in a file that are matched to determine the filetype. + A higher limit means better accuracy of guessing the filetype, but also + taking more time. + + default value: `100` + * `diffgutter`: display diff indicators before lines. default value: `false` diff --git a/runtime/syntax/README.md b/runtime/syntax/README.md index e97a4f77e..5bcbf1380 100644 --- a/runtime/syntax/README.md +++ b/runtime/syntax/README.md @@ -2,7 +2,7 @@ Here are micro's syntax files. -Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to a maximum of 100 lines (to limit parse times) for a best "guess". +Each yaml file specifies how to detect the filetype based on file extension or given signature. The signature can be matched to all available lines of the file or to the value defined with the option `detectlimit` (to limit parse times) for a best "guess". Then there are patterns and regions linked to highlight groups which tell micro how to highlight that filetype. Making your own syntax files is very simple. I recommend you check the file after you are finished with the