public abstract class Encoding extends Object implements Cloneable
| Modifier and Type | Field and Description |
|---|---|
static int |
CHAR_INVALID |
protected boolean |
isUnicode |
protected boolean |
isUTF8 |
protected int |
maxLength |
protected int |
minLength |
static byte |
NEW_LINE |
| Modifier | Constructor and Description |
|---|---|
protected |
Encoding(String name,
int minLength,
int maxLength) |
| Modifier and Type | Method and Description |
|---|---|
abstract void |
applyAllCaseFold(int flag,
ApplyAllCaseFoldFunction fun,
Object arg)
Expand case folds given a character class (used for case insensitive matching)
|
static byte |
asciiToLower(int c) |
static byte |
asciiToUpper(int c) |
abstract CaseFoldCodeItem[] |
caseFoldCodesByString(int flag,
byte[] bytes,
int p,
int end)
Expand AST string nodes into their folded alternatives (look at:
Analyser.expandCaseFoldString)
Oniguruma equivalent: get_case_fold_codes_by_str |
abstract int |
codeToMbc(int code,
byte[] bytes,
int p)
Extracts code point into it's multibyte representation
|
abstract int |
codeToMbcLength(int code)
Returns character length given a code point
Oniguruma equivalent:
code_to_mbclen |
abstract int[] |
ctypeCodeRange(int ctype,
IntHolder sbOut)
Returns code range for a given character type
Oniguruma equivalent:
get_ctype_code_range |
static int |
digitVal(int code) |
boolean |
equals(Object other) |
Charset |
getCharset()
If this encoding is capable of being represented by a Java Charset
then provide it.
|
String |
getCharsetName() |
int |
getIndex() |
byte[] |
getName() |
int |
hashCode() |
boolean |
isAlnum(int code) |
boolean |
isAlpha(int code) |
static boolean |
isAscii(byte b) |
static boolean |
isAscii(int code) |
boolean |
isAsciiCompatible() |
boolean |
isBlank(int code) |
boolean |
isCntrl(int code) |
abstract boolean |
isCodeCType(int code,
int ctype)
Perform a check whether given code is of given character type (e.g.
|
boolean |
isDigit(int code) |
boolean |
isDummy() |
boolean |
isFixedWidth() |
boolean |
isGraph(int code) |
boolean |
isLower(int code) |
static boolean |
isMbcAscii(byte b) |
boolean |
isMbcCrnl(byte[] bytes,
int p,
int end) |
boolean |
isMbcHead(byte[] bytes,
int p,
int end) |
boolean |
isMbcWord(byte[] bytes,
int p,
int end) |
abstract boolean |
isNewLine(byte[] bytes,
int p,
int end)
Returns true if
bytes[p] is a head of a new line character
Oniguruma equivalent: is_mbc_newline |
boolean |
isNewLine(int code) |
boolean |
isPrint(int code) |
boolean |
isPunct(int code) |
abstract boolean |
isReverseMatchAllowed(byte[] bytes,
int p,
int end)
Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm
Oniguruma equivalent:
is_allowed_reverse_match |
boolean |
isSbWord(int code) |
boolean |
isSingleByte() |
boolean |
isSpace(int code) |
boolean |
isUnicode() |
boolean |
isUpper(int code) |
boolean |
isUTF8() |
boolean |
isWord(int code) |
static boolean |
isWordGraphPrint(int ctype) |
boolean |
isXDigit(int code) |
abstract int |
leftAdjustCharHead(byte[] bytes,
int p,
int s,
int end)
Seeks the previous character head in a stream
Oniguruma equivalent:
left_adjust_char_head |
abstract int |
length(byte c)
Returns character length given character head
returns
1 for singlebyte encodings or performs direct length table lookup for multibyte ones. |
abstract int |
length(byte[] bytes,
int p,
int end)
Returns character length given stream, character position and stream end
returns
1 for singlebyte encodings or performs sanity validations for multibyte ones
and returns the character length, missing characters in the stream otherwise |
static Encoding |
load(String name) |
int |
maxLength()
Returns maximum character byte length that can appear in an encoding
Oniguruma equivalent:
max_enc_len |
int |
maxLengthDistance() |
abstract int |
mbcCaseFold(int flag,
byte[] bytes,
IntHolder pp,
int end,
byte[] to)
Performs case folding for a character at
bytes[pp.value] |
int |
mbcodeStartPosition() |
abstract int |
mbcToCode(byte[] bytes,
int p,
int end)
Returns code point for a character
Oniguruma equivalent:
mbc_to_code |
int |
minLength()
Returns minimum character byte length that can appear in an encoding
Oniguruma equivalent:
min_enc_len |
static int |
odigitVal(int code) |
int |
prevCharHead(byte[] bytes,
int p,
int s,
int end) |
abstract int |
propertyNameToCType(byte[] bytes,
int p,
int end)
Returns character type given character type name (used when e.g.
|
int |
rightAdjustCharHead(byte[] bytes,
int p,
int s,
int end) |
int |
rightAdjustCharHeadWithPrev(byte[] bytes,
int p,
int s,
int end,
IntHolder prev) |
protected void |
setDummy() |
protected void |
setName(byte[] name) |
protected void |
setName(String name) |
int |
step(byte[] bytes,
int p,
int end,
int n) |
int |
stepBack(byte[] bytes,
int p,
int s,
int end,
int n) |
int |
strByteLengthNull(byte[] bytes,
int p,
int end) |
abstract int |
strCodeAt(byte[] bytes,
int p,
int end,
int index) |
abstract int |
strLength(byte[] bytes,
int p,
int end) |
int |
strLengthNull(byte[] bytes,
int p,
int end) |
int |
strNCmp(byte[] bytes,
int p,
int end,
byte[] ascii,
int asciiP,
int n) |
byte[] |
toLowerCaseTable()
Returns lower case table if it's safe to use it directly, otherwise
null
Used for fast case insensitive matching for some singlebyte encodings |
String |
toString() |
int |
xdigitVal(int code) |
public static final int CHAR_INVALID
protected final int minLength
protected final int maxLength
protected boolean isUnicode
protected boolean isUTF8
public static final byte NEW_LINE
protected Encoding(String name, int minLength, int maxLength)
protected final void setName(String name)
protected final void setName(byte[] name)
protected final void setDummy()
public final int getIndex()
public final byte[] getName()
public final boolean isDummy()
public final boolean isAsciiCompatible()
public final boolean isUnicode()
public final boolean isUTF8()
public Charset getCharset()
public String getCharsetName()
public abstract int length(byte c)
1 for singlebyte encodings or performs direct length table lookup for multibyte ones.c - Character head
Oniguruma equivalent: mbc_enc_len
To be deprecated very soon (use length(byte[]bytes, int p, int end) version)public abstract int length(byte[] bytes,
int p,
int end)
1 for singlebyte encodings or performs sanity validations for multibyte ones
and returns the character length, missing characters in the stream otherwisembc_enc_len
modified for 1.9 purposes,public final int maxLength()
max_enc_lenpublic final int maxLengthDistance()
public final int minLength()
min_enc_lenpublic abstract boolean isNewLine(byte[] bytes,
int p,
int end)
bytes[p] is a head of a new line character
Oniguruma equivalent: is_mbc_newlinepublic abstract int mbcToCode(byte[] bytes,
int p,
int end)
mbc_to_codepublic abstract int codeToMbcLength(int code)
code_to_mbclenpublic abstract int codeToMbc(int code,
byte[] bytes,
int p)
code_to_mbcpublic abstract int mbcCaseFold(int flag,
byte[] bytes,
IntHolder pp,
int end,
byte[] to)
bytes[pp.value]flag - case fold flagpp - an IntHolder that points at character headto - a buffer where to extract case folded character
Oniguruma equivalent: mbc_case_foldpublic byte[] toLowerCaseTable()
null
Used for fast case insensitive matching for some singlebyte encodingspublic abstract void applyAllCaseFold(int flag,
ApplyAllCaseFoldFunction fun,
Object arg)
flag - case fold flagfun - case folding functor (look at: ApplyCaseFold)arg - case folding functor argument (look at: ApplyCaseFoldArg)
Oniguruma equivalent: apply_all_case_foldpublic abstract CaseFoldCodeItem[] caseFoldCodesByString(int flag, byte[] bytes, int p, int end)
Analyser.expandCaseFoldString)
Oniguruma equivalent: get_case_fold_codes_by_strpublic abstract int propertyNameToCType(byte[] bytes,
int p,
int end)
property_name_to_ctypepublic abstract boolean isCodeCType(int code,
int ctype)
code - a code point of a characterctype - a character type to check against
Oniguruma equivalent: is_code_ctypepublic abstract int[] ctypeCodeRange(int ctype,
IntHolder sbOut)
get_ctype_code_rangepublic abstract int leftAdjustCharHead(byte[] bytes,
int p,
int s,
int end)
left_adjust_char_headbytes - byte streamp - positions - stopend - endpublic abstract boolean isReverseMatchAllowed(byte[] bytes,
int p,
int end)
is_allowed_reverse_matchpublic final int rightAdjustCharHead(byte[] bytes,
int p,
int s,
int end)
public final int rightAdjustCharHeadWithPrev(byte[] bytes,
int p,
int s,
int end,
IntHolder prev)
public final int prevCharHead(byte[] bytes,
int p,
int s,
int end)
public final int stepBack(byte[] bytes,
int p,
int s,
int end,
int n)
public final int step(byte[] bytes,
int p,
int end,
int n)
public abstract int strLength(byte[] bytes,
int p,
int end)
public abstract int strCodeAt(byte[] bytes,
int p,
int end,
int index)
public final int strLengthNull(byte[] bytes,
int p,
int end)
public final int strByteLengthNull(byte[] bytes,
int p,
int end)
public final int strNCmp(byte[] bytes,
int p,
int end,
byte[] ascii,
int asciiP,
int n)
public final boolean isNewLine(int code)
public final boolean isGraph(int code)
public final boolean isPrint(int code)
public final boolean isAlnum(int code)
public final boolean isAlpha(int code)
public final boolean isLower(int code)
public final boolean isUpper(int code)
public final boolean isCntrl(int code)
public final boolean isPunct(int code)
public final boolean isSpace(int code)
public final boolean isBlank(int code)
public final boolean isDigit(int code)
public final boolean isXDigit(int code)
public final boolean isWord(int code)
public final boolean isMbcWord(byte[] bytes,
int p,
int end)
public final boolean isSbWord(int code)
public final boolean isMbcHead(byte[] bytes,
int p,
int end)
public boolean isMbcCrnl(byte[] bytes,
int p,
int end)
public static int digitVal(int code)
public static int odigitVal(int code)
public final int xdigitVal(int code)
public static boolean isMbcAscii(byte b)
public static boolean isAscii(int code)
public static boolean isAscii(byte b)
public static byte asciiToLower(int c)
public static byte asciiToUpper(int c)
public static boolean isWordGraphPrint(int ctype)
public final int mbcodeStartPosition()
public final boolean isSingleByte()
public final boolean isFixedWidth()
Copyright © 2016. All Rights Reserved.