Você está na página 1de 27

{

"settings": {
"analysis": {
"filter": {
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},
"brazilian_keywords": {
"type": "keyword_marker",
"keywords": []
}
},
"analyzer": {
"my_analyzer": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"brazilian_stop",
"brazilian_keywords"
]
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

https://gist.github.com/alopes/5358189#file-stopwords-txt

-- INI_STOPWORDS --

br_stopwords
------------
https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/
resources/org/apache/lucene/analysis/br/stopwords.txt

a
ainda
alem
ambas
ambos
antes
ao
aonde
aos
apos
aquele
aqueles
as
assim
com
como
contra
contudo
cuja
cujas
cujo
cujos
da
das
de
dela
dele
deles
demais
depois
desde
desta
deste
dispoe
dispoem
diversa
diversas
diversos
do
dos
durante
e
ela
elas
ele
eles
em
entao
entre
essa
essas
esse
esses
esta
estas
este
estes
ha
isso
isto
logo
mais
mas
mediante
menos
mesma
mesmas
mesmo
mesmos
na
nas
nao
nas
nem
nesse
neste
nos
o
os
ou
outra
outras
outro
outros
pelas
pelas
pelo
pelos
perante
pois
por
porque
portanto
proprio
propios
quais
qual
qualquer
quando
quanto
que
quem
quer
se
seja
sem
sendo
seu
seus
sob
sobre
sua
suas
tal
tambem
teu
teus
toda
todas
todo
todos
tua
tuas
tudo
um
uma
umas
uns

-- END_STOPWORDS --

ALTERNATIVA:

{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

ALTERNATIVA:

{
"settings": {
"analysis": {

"filter" : {
"DEJT_remove_colons" : {
"type": "pattern_replace",
"pattern": ",",
"replacement": ""
}
},

"analyzer": {
"DEJT_analyzer": {
"type" : "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"DEJT_remove_colons"
]
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "DEJT_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}
Alternativa:

{
"settings": {
"analysis": {

"filter" : {
"DEJT_remove_colons" :
{
"type": "pattern_replace",
"pattern": "[,.]",
"replacement": ""
}
},

"analyzer": {
"DEJT_analyzer": {
"type" : "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"DEJT_remove_colons"
]
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "DEJT_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

Alternativa:
{
"settings": {
"analysis": {
"filter": {
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},
"brazilian_keywords": {
"type": "keyword_marker",
"keywords": [","]
}
},
"char_filter" : {
"DEJT_char_filter" : {
"type" : "mapping",
"mappings" : [
", => ",
". => "
]
}
},

"analyzer": {
"my_analyzer": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"brazilian_stop",
"brazilian_keywords"
],
"char_filter" : [
"DEJT_char_filter"
]
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}
}
}
}

Alternativa:

{
"settings": {
"analysis": {

"filter" : {
"my_word_delimiter" : {
"type" : "word_delimiter",
"generate_word_parts" : false,
"generate_number_parts" : false,
"split_on_case_change" : false,
"split_on_numerics" : false
}
},

"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding",
"my_word_delimiter"
]
}
}

}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}
Alternativa:

{
"settings": {
"analysis": {
"filter" : {
"my_stop" : {

"type" : "stop",
"stopwords" : [ "," , "." ]
}

},
"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"my_stop"
]
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

Alternativa:

{
"settings": {
"analysis": {
"filter" : {
"my_stop" : {

"type" : "stop",
"stopwords" : [ "," , "." ]
}

},
"char_filter": {
"my_char_filter": {
"type": "mapping",
"mappings": [
"٠ => ",
", => "
]
}
},

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"my_stop"
],
"char_filter" : [
"my_char_filter"
]
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

Alternativa:

{
"settings": {
"analysis": {

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[,. ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

TOKENIZER FC-05:

WordStop

Nova Linha -> \n


Espaço -> ' '
Vírgula: ,
Ponto: .

{
"settings": {
"analysis": {

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

-------
28/05

{
"settings": {
"analysis": {

"filter": {
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},
"custom_stop" : {
"type" : "stop",
"stopwords" : [ "para" ]
},

"brazilian_keywords": {
"type": "keyword_marker",
"keywords": []
}
},

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"stopwords" : [ "para" ],
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

-----

{
"settings": {
"analysis": {

"filter": {
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
}
},

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"stopwords" : [ "para" ],
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

---

{
"settings": {
"analysis": {

"filter": {
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},

"brazilian_keywords": {
"type": "keyword_marker",
"keywords": []
}
},

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"stopwords" : [ "para" ],
"filter": [
"lowercase",
"asciifolding",
"brazilian_stop"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

--

{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

--

Ultimo:

{
"settings": {
"analysis": {

"filter": {
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},

"custom_stop" : {
"type" : "stop",
"stopwords" : [ "para" ]
}
},

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"brazilian_stop",
"custom_stop"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

--

{
"settings": {
"analysis": {

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

---

Ultimo:

{
"settings": {
"analysis": {

"filter" : {
"my_shingle_filter" : {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 2,
"output_unigrams": false
},
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},

"custom_stop" : {
"type" : "stop",
"stopwords" : [ "para" ]
}
},

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"my_shingle_filter",
"brazilian_stop",
"custom_stop"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

--

ultimo:

{
"settings": {
"analysis": {

"filter" : {
"my_shingle_filter" : {
"type": "shingle",
"min_shingle_size": 3,
"max_shingle_size": 5,
"output_unigrams": true
},
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
}
},

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding",
"my_shingle_filter",
"brazilian_stop"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "[)(,.;\n ]"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

--

Usar esse!!!!

{
"settings": {
"analysis": {

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "(?<=[\\p{L}\\d])([:;,.?!-])(?=[\\s\\n])|(?<=[\\p{L}])(\\/)
(?=[\\p{L}])|(?<=[\\p{L}\\d])(\\,)(?=[\\p{L}\\s])|(?<=[\\p{L}\\d)(])(\\.)([\\s\\n]|
$)|(?<=\\s)(-)(?=\\s)|([\\s\\(\\)\\[\\]\\{\\}])|(?<=[\\d)(])(\\/)(?=[\\d])|(\\")|
([\\.\\_\\-]{2,})|([a-z])(?=[\\d]{5,6})"
}
}
}
},

"mappings": {
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"file" : {
"properties" : {
"filename" : {
"type" : "keyword",
"store" : true
}

}
}

}
}
}

(?<=[\\p{L}])(\\/)(?=[\\p{L}])|(?<=[\\p{L}\\d])(\\,)(?=[\\p{L}\\s])|(?<=[\\p{L}\\
d])(\\:)(?=\\s)|(?<=[\\p{L}\\d])(\\.)([\\s\\n]|$)|(?<=\\s)(-)(?=\\s)|([\\s\\(\\)])

TODO: ao(à), interessado(a)


REGEX:

TODO: Incluir acentuação (talvez \p{L})


-> 1X. Captura a barra em caso de ocorrência no meio de palavra, ex: Brasilia/DF
(?<=[\p{L}])(\/)(?=[\p{L}])

-> 2X. Captura a vírgula somente se precedida de caractere ou digito e sucedida de


espaço ou caractere, ex: "erick andrade, que chegou.."
(?<=[\p{L}\d])(\,)(?=[\p{L}\s])

-> 3X. Captura dois pontos(:), ponto-e-virgula(;), virgula(,) e ponto(.) no final


de uma palavra ou numero, ex: no valor total:
(?<=[\p{L}\d])([:;,.])(?=[\s\n])

-> 4X. Captura o ponto final depois de palavras e números, ex: Valor de R$
1.234,30.
(?<=[\p{L}\d)(])(\.)([\s\n]|$)

-> 5X. Captura hifen precedido e sucedido de espaço:


(?<=\s)(-)(?=\s)

-> 6X. Captura espaço + parenteses:


([\s\(\)])

Adiçoes após dia 17/06:

Obs. A regra abaixo foi incorporada à regra 3


-> Capturar ponto-e-virgula no fim de palavra:
(?<=[\p{L}\d])(\;)(?=\s)

TODO TODO TODO: Não pega minha matrícula com final em ponto-e-virgula!!!!!!!!!!

(?<=[\p{L}\d])([:;,.?!-])(?=[\s\n])|(?<=[\p{L}])(\/)(?=[\p{L}])|(?<=[\p{L}\d])(\,)
(?=[\p{L}\s])|(?<=[\p{L}\d)(])(\.)([\s\n]|$)|(?<=\s)(-)(?=\s)|([\s\(\)\[\]\{\}])|(?
<=[\d)(])(\/)(?=[\d])|(["])|([\.\_\-]{2,})|([a-z])(?=[\d]{5,6})

REGEX Otimizadas:

Regra 1: Captura sinais de pontuação no fim de palavras ou números (e128414; por


exemplo)
REGEX: (?<=[\p{L}\d])([:;,.?!-])(?=[\s\n])
REGEX JAVA: (?<=[\\p{L}\\d])([:;,.?!-])(?=[\\s\\n])

Regra 2: Captura a barra (/) quando estiver separando palavras (Brasilia/DF, por
exemplo)
REGEX: (?<=[\p{L}])(\/)(?=[\p{L}])
REGEX_JAVA: (?<=[\\p{L}])(\\/)(?=[\\p{L}])

Regra 3: Captura a vírgula quando estiver separando palavras, ou se a virgula


estiver separando um número seguido de palavra
REGEX: (?<=[\p{L}\d])(\,)(?=[\p{L}\s])
REGEX_JAVA: (?<=[\\p{L}\\d])(\\,)(?=[\\p{L}\\s])
Regra 4: Captura o ponto-final depois de palavras e números (sendo assim, sessao
encerrada. OU Valor de 12.323,32.)
REGEX: (?<=[\p{L}\d)(])(\.)([\s\n]|$)
REGEX_JAVA: (?<=[\\p{L}\\d)(])(\\.)([\\s\\n]|$)

Regra 5: Captura hífen precedido e sucedido de espaço


REGEX: (?<=\s)(-)(?=\s)
REGEX_JAVA: (?<=\\s)(-)(?=\\s)

Regra 6: Captura chaves, parenteses e colchetes


REGEX: ([\s\(\)\[\]\{\}])
REGEX_JAVA: ([\\s\\(\\)\\[\\]\\{\\}])

Regra 7: Quebrar números que estiverem separados por barra (/): Lei nº 8.112/1992,
Lei 8.112/92, Lei 8.112..
REGEX: (?<=[\d)(])(\/)(?=[\d])
REGEX_JAVA: (?<=[\\d)(])(\\/)(?=[\\d])

Regra 8: Captura todas as aspas


REGEX: (["])
REGEX_JAVA: ([\"])

Regra 9: Captura pontos ou hifens seguidos


REGEX: ([\.\_\-]{2,})
REGEX_JAVA: ([\\.\\_\\-]{2,})

Regra 10: Ignorar letra da matrícula?


REGEX: ([a-z])(?=[\d]{5,6})
REGEX_JAVA: ([a-z])(?=[\\d]{5,6})

Regex usada no mapping:

(?<=[\\p{L}\\d])([:;,.?!-])(?=[\\s\\n])|(?<=[\\p{L}])(\\/)(?=[\\p{L}])|(?<=[\\
p{L}\\d])(\\,)(?=[\\p{L}\\s])|(?<=[\\p{L}\\d)(])(\\.)([\\s\\n]|$)|(?<=\\s)(-)(?=\\
s)|([\\s\\(\\)\\[\\]\\{\\}])|(?<=[\\d)(])(\\/)(?=[\\d])|(\\")|([\\.\\_\\-]{2,})|
([a-z])(?=[\\d]{5,6})

Mapping PJE_Sentencas:

{
"settings": {
"analysis": {

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding"
],
"char_filter" : [
"html_strip"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "(?<=[\\p{L}\\d])([:;,.?!-])(?=[\\s\\n])|(?<=[\\p{L}])(\\/)
(?=[\\p{L}])|(?<=[\\p{L}\\d])(\\,)(?=[\\p{L}\\s])|(?<=[\\p{L}\\d)(])(\\.)([\\s\\n]|
$)|(?<=\\s)(-)(?=\\s)|([\\s\\(\\)\\[\\]\\{\\}])|(?<=[\\d)(])(\\/)(?=[\\d])|([\"])|
([\\.\\_\\-]{2,})|([a-z])(?=[\\d]{5,6})"
}
}
}
},

"mappings": {
"properties": {
"documento": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"documento_parsed": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
}
}
}
}
}

{
"settings": {
"analysis": {

"analyzer": {
"my_analyzer": {
"type" : "custom",
"tokenizer": "my_tokenizer",
"filter": [
"lowercase",
"asciifolding"
],
"char_filter" : [
"html_strip"
]
}
},
"tokenizer" : {
"my_tokenizer" : {
"type": "pattern",
"pattern" : "(?<=[\\p{L}\\d])([:;,.?!-])(?=[\\s\\n])|(?<=[\\p{L}])(\\/)
(?=[\\p{L}])|(?<=[\\p{L}\\d])(\\,)(?=[\\p{L}\\s])|(?<=[\\p{L}\\d)(])(\\.)([\\s\\n]|
$)|(?<=\\s)(-)(?=\\s)|([\\s\\(\\)\\[\\]\\{\\}])|(?<=[\\d)(])(\\/)(?=[\\d])|([\"])|
([\\.\\_\\-]{2,})|([a-z])(?=[\\d]{5,6})"
}
}
}
},

"mappings": {
"properties": {
"documento": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
},
"documento_parsed": {
"type": "text",
"analyzer": "my_analyzer",
"term_vector" : "with_positions_offsets"
}
}
}
}

New REGEX:

1. Simulação de (\W+):
([^A-Za-z0-9\x{00C0}-\x{00FF}\x{00BA}\x{00AA}\/:.-]+)

2. Anterior não é letra nem digito, captura o hifen, o proximo é letra ou digito...
(?<=[^\p{L}\d])([-.])(?=[\p{L}\d])

3. Captura ponto, hifen, dois-pontos no fim de palavras/numeros:


(?<=[\p{L}\d\"])([.:-]+)(?=$|[^\p{L}\d])

4. Captura barra separando palavras


(?<=[\p{L}])([\/]+)(?=[\p{L}])
5. Remover barra no começo de palavras
(?<=[^\p{L}\d\/])([\/])(?=[\p{L}\d])

5. Remover ocorrencias de ponto, underline e hifen sozinhos ou repetidos


(?<=[^\p{L}\d])([._\/-]+)(?=[^\p{L}\d])

6. Separa digitos com barra


(?<=[\d])(\/)(?=[\d])

7. Ignorar letra matricula


([a-z])(?=[\d]{5,6})

Você também pode gostar