Sunday, December 18, 2011

Re: sh (bash) syntax for here-document strings: embedding other languages


syntax clear
syntax on

function! MatchHeredocType(filetype) abort
let s:ft_grouplist_name=toupper(a:filetype)

" compose a vim regular expression to match a line with a shell quote here-document redirection,
" followed by a vim modeline in any of the following 5 lines of text

" un-quoted user-defined delimitar for the here-document
" anything but a newline or a quote (though technically a quote can
" be present in a double-quoted delimiter, and reverse).
let s:regexp_heredoc_delim = '[^\n"' . "'" . ']+'

let s:regexp_heredoc_delim = '\w\+'

" the quoted delimiter
let s:regexp_quoted_delim = '\("\|' . "'" . '\)\(' . s:regexp_heredoc_delim . '\)\1' " external reference \z1

" the command line tail starting with the here-doc redirection and
" continued with other trailing redirections
let s:regexp_heredoc_redir_line = '<<-\?\s*' . s:regexp_quoted_delim . '\s*\%(.*\|\\\n\s*\)\n'

" match the heredoc redirection as leading conext or regular expression anchor
let s:regexp_redir_line_anchor = '\%(' . s:regexp_heredoc_redir_line . '\)\@<='

" maximum number of modelines to be checked, minus one
let s:regexp_modeline_candidates = '\%(.*\n\)\{-,4}'

" ':set' argument format (option[=value]) for modeline-like here document line
let s:regexp_set_argument = '\a\+\%(=[^\n\s:]*\)\?'

" modeline-like line introducer:
" '[TEXT...] vim: '...
" or
" 'TEXT... ex: '...
let s:regexp_modeline_introducer = '\%(\%(.*\s\+\)\?vim\?\|.*\S\+\s\+ex\):\s*'

" new style modeline: [TEXT] vim: op=val:op=val:...
let s:regexp_new_style_modeline = '\%(' . s:regexp_set_argument . ':\)*\%(ft\|filetype\)=' . a:filetype . '\%(:' . s:regexp_set_argument . '\)*\s*'

" old style modeline: TEXT... ex: set op=val op=val op=val: TEXT...
let s:regexp_old_style_modeline = 'set\?\s\+\%(' . s:regexp_set_argument . '\s\+\)*\%(ft\|filetype\)=' . a:filetype . '\%(\s\+' . s:regexp_set_argument . '\)*\s*:.*'

" modeline-like line
let s:regexp_modeline = s:regexp_modeline_introducer . '\%(' . s:regexp_new_style_modeline . '\|' . s:regexp_old_style_modeline . '\)$'

" here-document modeline matcher
let s:regexp_heredocument_modeline = s:regexp_redir_line_anchor " . s:regexp_modeline_candidates . s:regexp_modeline

if 0
echo "Region start: " s:regexp_heredocument_modeline
endif

exec 'syntax include @' . s:ft_grouplist_name . ' syntax/' . a:filetype. '.vim'
if exists('b:current_syntax')
unlet b:current_syntax
endif

" exec 'syntax region sh_' . a:filetype . '_heredoc keepend excludenl start=#' . s:regexp_heredocument_modeline . '# end=#\%(^\t*\z1$\)\@=# contained contains=@' . s:ft_grouplist_name . ' containedin=shHereDoc'
exec 'syntax match sh_' . a:filetype . '_heredoc keepend excludenl #' . s:regexp_heredocument_modeline . '\_.\{0,}\%(^\t*\2$\)\@=# contained contains=@' . s:ft_grouplist_name . ' containedin=shHereDoc'

endfunction

if exists('b:current_syntax')
let s:current_syntax=b:current_syntax
unlet b:current_syntax
endif

call MatchHeredocType('sh')
call MatchHeredocType('sed')
call MatchHeredocType('awk')
call MatchHeredocType('vim')
call MatchHeredocType('perl')
call MatchHeredocType('python')
call MatchHeredocType('ruby')
call MatchHeredocType('tcl')
call MatchHeredocType('dosbatch')
call MatchHeredocType('sql')
call MatchHeredocType('bc')
call MatchHeredocType('ipfilter')
call MatchHeredocType('php')


if exists('s:current_syntax')
let b:current_syntax=s:current_syntax
endif
On 18.12.2011 05:20, Andy Spencer wrote:
>> In a shell here-document there is no indication of the script language
>> for the embedded document
>
> That's not necessarily true, I've done something similar and highlighted
> awk commands as awk from inside a shell script. The trick is that `awk'
> usually shows up on the command line so you can match that and make a
> guess that the string is an awk script, which works good enough for me.
> Using \@<= helps with that from within syn-regions.
>
> For example:
> awk 'BEGIN { print "hello, world" }'
>
> Could be matched with something like:
> syn region shAwk start=+\v(awk.*)@<='+ end=+'+ ...
>
>
>> The wiki page with the tip also says (at the end) the highlighting does
>> not end where it should, and I happen to run into exactly this bug. :(
>
> I think adding `keepend' to the syntax line might help with this.
>
>
>> I also have another question: how do I make a syntax region that starts
>> with a possibly-quoted identifier, like:
>>
>> "script"-\delimit'er'
>>
>> and ends with the /necessarily/ the same *un-quoted* identifier:
>>
>> script_delimiter
>>
>> Is there a way to express this with vim syntax highlighting commands ?
>
> You can use \z(\) and \z1 in a syn-region, but it probably won't support
> crazy quotations. I would just recommend not putting crazy quotations in
> your here document identifiers.. After all, you probably don't want to
> make vim a full-blown bash interpreter.
>
>
>> Also, I would like to define a new syntax region that is contained in
>> the shHereDoc group (already defined in syntax/sh.vim), and that:
>> - start where the shHereDoc starts, if the shHereDoc starts
>> with the exact text '# vi:ft=sh' as one of the first 4 lines
>> - ends where the shHereDoc ends
>> Can I express this with vim syntax highlighting ?
>
> You can do the `first 4 lines part' part using `\ze(\n.*){1,4}PATTERN'.
> I'm not sure about the best way to match the start of an existing group
> though. I would probably just copy the start= pattern from sh.vim, but
> there may be a better way.
>
> syn region shAwk start=+<<\v\z(.*)\ze(\n.*){1,4}vim:.*ft\=awk+ end=+\z1+ ...

The \z1 notation does not work for me no matter how much I try ...
Only the first lines get highlighted (the lines matched by the start
pattern in syn region ... start=+start_script...+ Other than those
initial lines, I just get the original highlight from shHereDoc.

You have my vim syntax script attached. The RE is

:syntax list sh_php_heredoc
--- Syntax items ---
sh_php_heredoc xxx match
/\%(<<-\?\s*\("\|'\)\(\S\+\)\1\s*\%(.*\|\\\n\s*\)\n\)\@<=\%(.*\n\)\{-,4}\%(\%(.*\s\+\)\?vim\?\|.*\S\+\s\+ex\):\s*\%(\%(\a\+\%(=[^\n\
s:]*\)\?:\)*\%(ft\|filetype\)=php\%(:\a\+\%(=[^\n\s:]*\)\?\)*\s*\|set\?\s\+\%(\a\+\%(=[^\n\s:]*\)\?\s\+\)*\%(ft\|filetype\)=php\%(\s\+\a\+\%(=[^\n\s:]*\)\?\)
*\s*:.*\)$\_.\{0,}\%(^\t*\2$\)\@=/ contained keepend excludenl
contains=@PHP containedin=shHereDoc
Press ENTER or type command to continue

Still, at run-time, the end expression \2, which should be the delimiter
without the quotes, (\S\+, does not match the delimiter at the end of
the here-document.

Even if I try to get a shorter RE, still the backreferenced expesssion
is not matched properly:
\%(<<-\?\s*\("\|'\)\(\w\+\)\1\s*\%(.*\|\\\n\s*\)\n\)\@<=\_.\{0,}\%(^\t*\2$\)\@=

Is it possible vim has a bug with regular expressions ?

Thank you,
Timothy Madden

--
You received this message from the "vim_use" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

No comments: