kregex
0.4.0indexedReadable, type-safe DSL for building regular expressions with full regex syntax, named captures, backreferences, Unicode support, pre-built patterns, pattern debugging, zero-dependency.
Readable, type-safe DSL for building regular expressions with full regex syntax, named captures, backreferences, Unicode support, pre-built patterns, pattern debugging, zero-dependency.
Kregex is a Kotlin Multiplatform DSL library for building regular expressions in a readable, type-safe, and maintainable way.
Instead of writing cryptic regex patterns like ^(?:[a-zA-Z0-9._%-]+)@(?:[a-zA-Z0-9-]+)\.(?:[a-zA-Z]{2,6})$, you can express them as clear, self-documenting Kotlin code.
implementation("io.github.bedshanty:kregex-jvm:0.4.0")
implementation 'io.github.bedshanty:kregex-jvm:0.4.0'
<dependency>
<groupId>io.github.bedshanty</groupId>
<artifactId>kregex-jvm</artifactId>
<version>0.4.0</version>
</dependency>
// commonMain
implementation()
implementation()
implementation()
implementation()
implementation()
implementation()
implementation()
implementation()
implementation()
implementation()
// Simple pattern
val digitPattern = regex {
line {
oneOrMore { digit() }
}
}
println(digitPattern.matches("12345")) // true
println(digitPattern.matches("abc")) // false
// #[0-9A-Fa-f]{6}
val hexColorPattern = regex {
literal("#")
repeat(6) { hexDigit() }
}
println(hexColorPattern.matches("#FF5733")) // true
println(hexColorPattern.matches("#abc123")) // true
println(hexColorPattern.matches("#GHIJKL")) // false
// ^(?:\+(?:\d){1,3}[ -])?(?:\d){3}[ -](?:\d){3,4}[ -](?:\d){4}$
val phonePattern = regex {
line {
optional {
literal("+")
repeat(1, 3) { digit() }
anyOf()
}
repeat() { digit() }
anyOf()
repeat(, ) { digit() }
anyOf()
repeat() { digit() }
}
}
println(phonePattern.matches())
println(phonePattern.matches())
println(phonePattern.matches())
println(phonePattern.matches())
// ^(?=.*[a-z])(?=.*[A-Z])(?=.*[0-9])(?=.*[!@#$%^&*])[a-zA-Z0-9!@#$%^&*]{2,16}$
// 2-16 chars, requires: lowercase, uppercase, digit, special char (!@#$%^&*)
val passwordPattern = regex {
line {
lookAhead { zeroOrMore { anyChar() }; asciiLowercase() }
lookAhead { zeroOrMore { anyChar() }; asciiUppercase() }
lookAhead { zeroOrMore { anyChar() }; asciiDigit() }
lookAhead { zeroOrMore { anyChar() }; anyOf() }
repeat(, ) {
anyOf {
asciiAlphanumeric()
chars()
}
}
}
}
println(passwordPattern.matches())
println(passwordPattern.matches())
println(passwordPattern.matches())
println(passwordPattern.matches())
// ^(?<protocol>https|http)://(?<domain>[\w.-]+)(?<port>:\d+)?(?<path>[\w/.-]*)?$
urlPattern = regex {
line {
captureAs() {
either(
{ literal() },
{ literal() }
)
}
literal()
captureAs() {
oneOrMore { anyOf { wordChar(); chars() } }
}
optional {
captureAs() {
literal()
oneOrMore { digit() }
}
}
optional {
captureAs() {
zeroOrMore { anyOf { wordChar(); chars() } }
}
}
}
}
match = urlPattern.find()!!
println(match.groups[]?.value)
println(match.groups[]?.value)
println(match.groups[]?.value)
// <(?<tag>\w+).*>.*?</\k<tag>>
val htmlTag = regex {
literal("<")
captureAs("tag") { oneOrMore { wordChar() } }
zeroOrMore { anyChar() }
literal(">")
zeroOrMoreLazy { anyChar() }
literal("</")
backReference("tag")
literal(">")
}
println(htmlTag.containsMatchIn("<div>content</div>")) // true
println(htmlTag.containsMatchIn("<div>content</span>")) // false
| Function | Description |
|---|---|
regex { } | Creates a Regex object from the DSL |
Use the standard Regex.pattern property to inspect the generated pattern string.
Instead of manually adding anchors, use the convenience blocks:
// Before
regex {
startOfLine()
oneOrMore { digit() }
endOfLine()
}
// After
regex {
line { oneOrMore { digit() } }
}
// Result: ^\d+$
regex {
input { oneOrMore { digit() } }
}
// Result: \A\d+\z
Note:
digit()uses\dwhich may match unicode digits depending on regex flags. UseasciiDigit()for strict ASCII digits (0-9).
Note: On JavaScript, the
uflag is automatically included by Kotlin/JS, so Unicode features work out of the box.
Use unicode { } block for combining Unicode classes:
regex {
unicode {
letter() // \p{L}
number() // \p{N}
script("Han") // \p{IsHan} (JVM) / \p{Script=Han} (JS)
}
}
// Result: [\p{L}\p{N}\p{IsHan}] (JVM) / [\p{L}\p{N}\p{Script=Han}] (JS)
Available methods inside unicode { }:
Use ascii { } block for combining ASCII ranges:
regex {
ascii {
lower() // a-z
upper() // A-Z
digit() // 0-9
}
}
// Result: [a-zA-Z0-9]
regex {
ascii { hexDigit() }
}
// Result: [0-9a-fA-F]
Available methods inside ascii { }:
lower() - ASCII lowercase (a-z)upper() - ASCII uppercase (A-Z)digit() - ASCII digits (0-9)letter() - ASCII letters (a-zA-Z)alphanumeric() - ASCII alphanumeric (a-zA-Z0-9)These patterns work in both RegexBuilder and CharClassBuilder contexts:
// Match Korean text
val koreanPattern = regex {
line { oneOrMore { hangulSyllable() } }
}
println(koreanPattern.matches("안녕하세요")) // true
// Combine with other characters using hangul block
val mixed = regex {
oneOrMore {
anyOf {
hangul {
syllable() // 가-힣
jamo() // ㄱ-ㅣ
}
digit()
}
}
}
println(mixed.matches("가격1000ㅋㅋ")) // true
// Generated pattern: [가-힣ㄱ-ㅣ\d]+
Use hangul { } block for combining Hangul ranges:
regex {
hangul {
syllable() // 가-힣 (완성형)
consonant() // ㄱ-ㅎ (자음)
}
}
// Result: [가-힣ㄱ-ㅎ]
Available methods inside hangul { }:
syllable() - Complete syllables (가-힣)jamo() - All Jamo (ㄱ-ㅣ)consonant() - Consonants only (ㄱ-ㅎ)vowel() - Vowels only (ㅏ-ㅣ)Note: POSIX classes are only available on JVM.
Use posix { } block for combining POSIX classes:
regex {
posix {
alnum() // \p{Alnum}
punct() // \p{Punct}
}
}
// Result: [\p{Alnum}\p{Punct}]
Available methods inside posix { }:
anyOf {
range('a', 'z')
range('A', 'Z')
chars("_-") // String
chars('!', '@') // Vararg Char
chars("abc", "123") // Multiple strings
digit()
}
// Results in: [a-zA-Z_\-!@abc123\d]
noneOf {
range('0', '9')
}
// Results in: [^0-9]
// Using shortcuts directly
anyOf { hexDigit() }
// Result: [0-9a-fA-F]
// Using ascii block inside charClass
anyOf {
ascii {
lower()
digit()
}
chars("_")
}
// Result: [a-z0-9_]
| Method | Pattern | Description |
|---|---|---|
backReference(1) |
Lazy quantifiers match as few characters as possible:
oneOrMoreLazy { anyChar() } // (?:.)+?
zeroOrMoreLazy { anyChar() } // (?:.)*?
optionalLazy { digit() } // (?:\d)??
Possessive quantifiers don't backtrack:
oneOrMorePossessive { digit() } // (?:\d)++
zeroOrMorePossessive { anyChar() } // (?:.)*+
optionalPossessive { digit() } // (?:\d)?+
Note: Possessive quantifiers are only available on JVM.
You can also specify the mode as a parameter:
oneOrMore(QuantifierMode.GREEDY) { digit() } // default
oneOrMore(QuantifierMode.LAZY) { digit() }
repeat(2, 5, QuantifierMode.LAZY) { wordChar() }
// Match digits followed by "px"
regex {
oneOrMore { digit() }
lookAhead { literal("px") }
}
// Match digits NOT preceded by "$"
regex {
negativeLookBehind { literal("$") }
oneOrMore { digit() }
}
// Using or()
regex {
literal("cat")
or()
literal("dog")
}
// Using either()
regex {
either(
{ literal("cat") },
{ literal("dog") },
{ literal("bird") }
)
}
Kregex provides pre-built patterns for common use cases. These are extension functions on RegexBuilder that you can use directly in your regex definitions.
| Method | Description | Example Match |
|---|---|---|
email() | Basic email pattern | user@example.com |
val pattern = regex {
line { email() }
}
println(pattern.matches("user@example.com")) // true
| Method | Description | Example Match |
|---|---|---|
password(...) | Configurable password validation | Password1! |
password() Parameters:
Default special characters (OWASP recommended): !"#$%&'()*+,-./:;<=>?@[\]^_{|}~` (includes space)
Allowed characters: Only ASCII letters (a-z, A-Z), digits (0-9), and characters specified in allowedSpecialChars are allowed. Korean, emojis, etc. are rejected.
val pattern = regex {
line {
password(
minLength = 8,
maxLength = 20,
requireUppercase = true,
requireLowercase = true,
requireDigit = true,
requireSpecialChar = true
)
}
}
println(pattern.matches("Password1!")) // true
println(pattern.matches("password")) // false (requirements not met)
| Method | Description | Example Match |
|---|---|---|
httpUrl() | HTTP/HTTPS URL |
val pattern = regex {
line { httpUrl() }
}
println(pattern.matches("https://example.com/path")) // true
val pattern = regex {
line { ipv4Strict() }
}
println(pattern.matches("192.168.1.1")) // true
println(pattern.matches("256.1.1.1")) // false (invalid octet)
| Method | Description | Example Match |
|---|
val pattern = regex {
line { isoDateTime() }
}
println(pattern.matches("2026-01-15T14:30:00Z")) // true
println(pattern.matches("2026-01-15T14:30:00+09:00")) // true
val pattern = regex {
line { hexColor() }
}
println(pattern.matches("#FF5733")) // true
println(pattern.matches("#fff")) // true
val pattern = regex {
line { decimal() }
}
println(pattern.matches("123.456")) // true
println(pattern.matches("-0.5")) // true
println(pattern.matches(".25")) // true
Use the standard Regex.pattern property to inspect the generated pattern:
val regex = regex {
line {
oneOrMore { digit() }
literal("@")
oneOrMore { wordChar() }
}
}
println("Generated pattern: ${regex.pattern}")
// Output: ^(?:\d)+\Q@\E(?:\w)+$
The library validates inputs and throws IllegalArgumentException for invalid parameters:
range('z', 'a') // Error: Range start must be <= end
repeat(-1) { digit() } // Error: Repeat count must be non-negative
captureAs("123") { } // Error: Name must start with a letter
backReference(0) // Error: Group number must be >= 1
val pattern = Regex("^(?:[a-zA-Z0-9._%-]+)@(?:[a-zA-Z0-9-]+)\\.(?:[a-zA-Z]{2,6})$")
val pattern = regex {
line {
oneOrMore {
anyOf {
asciiAlphanumeric()
chars("._%-")
}
}
literal("@")
oneOrMore {
anyOf {
asciiAlphanumeric()
chars("-")
}
}
literal(".")
repeat(2, 6) {
anyOf { asciiLowercase() }
}
}
}
Copyright 2026 bedshanty
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http:
Unless applicable law agreed to writing, software
distributed under the License distributed an BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
See the License the specific language governing permissions
limitations under the License.
Contributions are welcome! Please feel free to submit a Pull Request.
Inspired by similar DSL libraries in other languages and the need for more readable regex patterns in Kotlin. Assisted by AI tools for boilerplate generation and documentation.
| Feature | JVM | JS | Native |
|---|
| Core DSL | ✅ | ✅ | ✅ |
Unicode Categories (\p{L}) | ✅ | ✅ | ✅ |
| Unicode Script | ✅ \p{IsHan} | ✅ \p{Script=Han} | ❌ |
Unicode Block (\p{InBasicLatin}) | ✅ | ❌ | ❌ |
POSIX Classes (\p{Alnum}) | ✅ | ❌ | ❌ |
| Possessive Quantifiers | ✅ | ❌ | ❌ |
regex(options) { } | Creates a Regex with options (e.g., IGNORE_CASE) |
| Method | Pattern | Description |
|---|
startOfLine() | ^ | Start of line |
endOfLine() | $ | End of line |
startOfInput() | \A | Start of input |
endOfInput() | \z | End of input |
wordBoundary() | \b | Word boundary |
nonWordBoundary() | \B | Non-word boundary |
line { } | ^...$ | Wraps with start/end of line anchors |
input { } | \A...\z | Wraps with start/end of input anchors |
| Method | Pattern | Description |
|---|
anyChar() | . | Any character (except newline) |
digit() | \d | Digit (may include unicode digits) |
asciiDigit() | [0-9] | ASCII digit only (0-9) |
nonDigit() | \D | Non-digit |
whitespace() | \s | Whitespace |
nonWhitespace() | \S | Non-whitespace |
wordChar() | \w | Word character (a-z, A-Z, 0-9, _) |
nonWordChar() | \W | Non-word character |
tab() | \t | Tab character |
newline() | \n | Newline character |
carriageReturn() | \r | Carriage return |
formFeed() | \f | Form feed |
alert() | \a | Alert/bell character |
escape() | \e | Escape character |
| Method | Pattern | Description | Platform |
|---|
unicodeProperty("L") | \p{L} | Unicode property | All |
notUnicodeProperty("L") | \P{L} | Negated Unicode property | All |
unicodeScript("Greek") | \p{IsGreek} (JVM) / \p{Script=Greek} (JS) | Unicode script | JVM, JS |
unicodeBlock("BasicLatin") | \p{InBasicLatin} | Unicode block | JVM only |
unicodeLetter() | \p{L} | Any Unicode letter | All |
unicodeUppercaseLetter() | \p{Lu} | Unicode uppercase letter | All |
unicodeLowercaseLetter() | \p{Ll} | Unicode lowercase letter | All |
unicodeNumber() | \p{N} | Any Unicode numeric character | All |
unicodePunctuation() | \p{P} | Unicode punctuation | All |
unicodeSymbol() | \p{S} | Unicode symbol | All |
property(name) - Unicode property (\p{...})notProperty(name) - Negated property (\P{...})script(name) - Unicode script (\p{Is...}) - JVM/JS onlyblock(name) - Unicode block (\p{In...}) - JVM onlyletter(), uppercaseLetter(), lowercaseLetter()number(), punctuation(), symbol()| Method | Pattern | Description |
|---|
asciiLowercase() | [a-z] | ASCII lowercase letters |
asciiUppercase() | [A-Z] | ASCII uppercase letters |
asciiDigit() | [0-9] | ASCII digits |
asciiLetter() | [a-zA-Z] | ASCII letters |
asciiAlphanumeric() | [a-zA-Z0-9] | ASCII alphanumeric |
hexDigit() | [0-9a-fA-F] | Hexadecimal digits |
hexDigit() - Hexadecimal digits (0-9a-fA-F)| Method | Pattern | Description |
|---|
hangulSyllable() | [가-힣] | Complete Hangul syllable |
hangulJamo() | [ㄱ-ㅣ] | Hangul Jamo - consonants and vowels |
hangulConsonant() | [ㄱ-ㅎ] | Hangul consonants only |
hangulVowel() | [ㅏ-ㅣ] | Hangul vowels only |
| Method | Pattern | Description |
|---|
posixAlnum() | \p{Alnum} | Alphanumeric [a-zA-Z0-9] |
posixAlpha() | \p{Alpha} | Alphabetic [a-zA-Z] |
posixAscii() | \p{ASCII} | ASCII characters [\x00-\x7F] |
posixBlank() | \p{Blank} | Space and tab [ \t] |
posixCntrl() | \p{Cntrl} | Control characters |
posixDigit() | \p{Digit} | Digits [0-9] |
posixGraph() | \p{Graph} | Visible characters (no space) |
posixLower() | \p{Lower} | Lowercase [a-z] |
posixPrint() | \p{Print} | Printable characters |
posixPunct() | \p{Punct} | Punctuation |
posixSpace() | \p{Space} | Whitespace [ \t\n\r\f\v] |
posixUpper() | \p{Upper} | Uppercase [A-Z] |
posixXDigit() | \p{XDigit} | Hex digits [0-9a-fA-F] |
alnum(), alpha(), ascii(), blank(), cntrl()digit(), graph(), lower(), print(), punct()space(), upper(), xdigit()| Method | Pattern | Description |
|---|
literal("text") | (escaped) | Match text literally |
char('x') | (escaped) | Match single character |
anyOf("abc") | [abc] | Any of these characters |
anyOf("abc", "xyz") | [abcxyz] | Multiple strings combined |
anyOf('a', 'b', 'c') | [abc] | Vararg characters |
anyOf { } | [...] | Character class builder (alias for charClass) |
noneOf("abc") | [^abc] | None of these characters |
noneOf("abc", "xyz") | [^abcxyz] | Multiple strings combined (negated) |
noneOf('a', 'b', 'c') | [^abc] | Vararg characters (negated) |
noneOf { } | [^...] | Negated character class builder (alias for negatedCharClass) |
range('a', 'z') | [a-z] | Character range |
notInRange('a', 'z') | [^a-z] | Negated character range |
appendRaw("pattern") | (as-is) | Raw pattern (no escaping) |
| Method | Pattern | Description |
|---|
asciiLowercase() | a-z | ASCII lowercase letters |
asciiUppercase() | A-Z | ASCII uppercase letters |
asciiDigit() | 0-9 | ASCII digits |
asciiLetter() | a-zA-Z | All ASCII letters |
asciiAlphanumeric() | a-zA-Z0-9 | ASCII letters and digits |
hexDigit() | 0-9a-fA-F | Hexadecimal digits |
| Method | Pattern | Description |
|---|
capture { } | (...) | Capturing group |
captureAs("name") { } | (?<name>...) | Named capturing group |
group { } | (?:...) | Non-capturing group |
atomicGroup { } | (?>...) | Atomic group (no backtracking) |
\1| Reference to group 1 |
backReference("name") | \k<name> | Reference to named group |
| Method | Pattern | Description |
|---|
optional { } | (?:...)? | 0 or 1 time |
zeroOrMore { } | (?:...)* | 0 or more times |
oneOrMore { } | (?:...)+ | 1 or more times |
repeat(n) { } | (?:...){n} | Exactly n times |
repeat(min, max) { } | (?:...){min,max} | Between min and max times |
atLeast(n) { } | (?:...){n,} | At least n times |
| Method | Pattern | Description |
|---|
lookAhead { } | (?=...) | Positive lookahead |
negativeLookAhead { } | (?!...) | Negative lookahead |
lookBehind { } | (?<=...) | Positive lookbehind |
negativeLookBehind { } | (?<!...) | Negative lookbehind |
| Method | Pattern | Description |
|---|
caseInsensitive { } | (?i:...) | Case-insensitive matching |
multiline { } | (?m:...) | Multiline mode |
dotAll { } | (?s:...) | Dot matches newlines |
comments { } | (?x:...) | Comments mode |
| Parameter | Type | Default | Description |
|---|
minLength | Int | 8 | Minimum length |
maxLength | Int? | 256 | Maximum length (null = unlimited) |
requireUppercase | Boolean | false | Require uppercase letter |
requireLowercase | Boolean | false | Require lowercase letter |
requireDigit | Boolean | false | Require digit |
requireSpecialChar | Boolean | false | Require special character |
allowedSpecialChars | String | OWASP recommended set | Allowed special characters |
https://example.com/path |
httpUrlWithCapture() | URL with named captures (protocol, domain, port, path) | https://example.com:8080/api |
| Method | Description | Example Match |
|---|
ipv4() | IPv4 address (format only) | 192.168.1.1 |
ipv4Strict() | IPv4 with octet validation (0-255) | 192.168.1.1 |
ipv6() | IPv6 address (8 groups) | 2001:0db8:85a3:0000:0000:8a2e:0370:7334 |
phoneNumber() | Flexible phone format | +1-123-456-7890, (123) 456-7890 |
usPhoneNumber() | US phone format | (123) 456-7890, 123-456-7890 |
| Method | Description | Example Match |
|---|
isoDate() | ISO 8601 date | 2026-01-15 |
time() | Time (HH:MM or HH:MM:SS) | 14:30, 14:30:59 |
isoDateTime() | ISO 8601 datetime | 2026-01-15T14:30:00Z |
| Method | Description | Example Match |
|---|
uuid() | UUID format | 550e8400-e29b-41d4-a716-446655440000 |
hexColor() | Hex color (#RGB, #RRGGBB) | #fff, #FF5733 |
slug() | URL-friendly identifier | my-blog-post-123 |
semver() | Semantic version | 1.0.0, 2.1.3-alpha.1 |
| Method | Description | Example Match |
|---|
word() | One or more word characters | Hello |
integer() | Integer with optional sign | 123, -456, +789 |
decimal() | Decimal number | 123.456, -0.5, .25 |
quotedString() | Double-quoted string | "hello" |
singleQuotedString() | Single-quoted string | 'hello' |
Surfaced from shared tags and platforms — no rankings paid for.