Related
How can I decode the UTF-8 codepoints of a MappedByteBuffer in Java without copying the buffer? Copying the buffer into memory would defeat the point of mapping the memory, and since CharsetDecoder would require me to copy the buffer with the toCharArray method, it would defeat the purpose of mapping the memory. Is there any way to efficiently decode it without copying the buffer?
Here is a somewhat more complete answer.
It reads the Input-File in Chunks & stores the result in a CharBuffer.
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.nio.charset.MalformedInputException;
import java.nio.charset.UnmappableCharacterException;
import java.nio.file.Files;
import java.nio.file.Path;
public class ChunkedUtf8Decoder {
/*
* Valid UTF-8 Multi-Bytes
* -----------------------
* 1-Byte : 0xxxxxxx
* 2-Byte : 110xxxxx + 10xxxxxx
* 3-Byte : 1110xxxx + 10xxxxxx + 10xxxxxx
* 4-Byte : 11110xxx + 10xxxxxx + 10xxxxxx + 10xxxxxx
*/
private static final int UTF8_CONTINUE_MASK = 0b11_000000;
private static final int UTF8_INVALID_MASK = 0b11111_000;
private static final int UTF8_CONTINUE_PREFIX = 0b10_000000;
private static final int UTF8_INVALID_PREFIX = UTF8_INVALID_MASK;
private static final int CHUNK_SIZE = 24; // TODO Test value! Try something larger, e.g. 64_000
private final CharsetDecoder utf8Decoder = UTF_8.newDecoder();
private final ByteBuffer bb = ByteBuffer.allocate(CHUNK_SIZE); // TODO maybe allocateDirect?
private final CharBuffer cb;
private final Path inputPath;
private byte[] remainder = {};
private int bytesChunked = 0;
public ChunkedUtf8Decoder(final Path inputPath) throws IOException {
this.inputPath = inputPath;
this.cb = CharBuffer.allocate(Math.toIntExact(Files.size(inputPath)));
// this.utf8Decoder.onMalformedInput (CodingErrorAction.REPLACE); // (TODO Default is REPORT)
// this.utf8Decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); // (TODO Default is REPORT)
}
/**
* Split the Input-File into Chunks & Decode them, appending the result to our CharBuffer.
*
* #throws IOException
*/
public void decode() throws IOException {
try(final FileChannel channel = FileChannel.open(inputPath))
{
while (channel.read(bb) != -1) {
debugBytesIn("Read......:", false);
this.remainder = backupToCharBoundary();
debugBytesIn("Backed up.:", true);
this.bytesChunked += decodeChunk();
bb.clear(); // (position=0, limit=capacity, mark=-1)
bb.put(this.remainder); // (Final Remainder is dealt with below)
}
debugBytesIn("EOF.......:", false);
/*
* Lastly, deal with Final Remainder (0 to 4 Bytes) # start of Buffer...
*/
decodeChunk();
}
}
/**
* We try to back up the BB to a Character boundary.
* If the file is correctly encoded, the shorter BB will then decode OK.
* The Remainder will be processed after the NEXT Read operation.
*
* #return
* #throws MalformedInputException
*/
private byte[] backupToCharBoundary() throws MalformedInputException {
for (int i = 1; i <= 4; i++) {
final int trailingBytePos = bb.position() - i;
if (trailingBytePos < 0) {
/*
* If there were too few Bytes available, carry them over until either
* a) more Bytes become available, or
* b) EOF is reached
*/
final byte[] remainder = new byte[bb.position()];
bb.get (0, remainder);
bb.clear(); // (position=0, limit=capacity, mark=-1)
return remainder; // (Entire contents of BB in Remainder, BB Empty)
}
final int trailingByte = 0xFF & bb.get(trailingBytePos);
/*
* We stop as soon as we encounter a Character-Selector...
* (The following 2 intervals are either invalid or a continuation-character)
*/
if ((trailingByte & UTF8_INVALID_MASK ) != UTF8_INVALID_PREFIX // 11111xxx
&& (trailingByte & UTF8_CONTINUE_MASK) != UTF8_CONTINUE_PREFIX) { // 10xxxxxx
/*
* OK, we have found a (1-, 2-, 3- or 4-Byte) Character-Selector...
*/
final byte[] remainder = new byte[i];
bb.get (trailingBytePos, remainder);
bb.position(trailingBytePos);
return remainder; // (Trailing 1-4 Bytes of BB in Remainder)
}
}
/*
* Although at least 4 Bytes are available, We couldn't find a Character-Selector!
* This is an error.
* We leave the ByteBuffer unchanged & return an empty Remainder.
* The CharsetDecoder will produce a MalformedInputException in due course...
*/
return new byte[0];
}
/**
* Decode the current Chunk of Bytes & append the result to our CB.
*
* #return
* #throws CharacterCodingException
*/
private int decodeChunk() throws CharacterCodingException {
try {
this.bb.flip(); // (limit=position, position=0, mark=-1)
; this.utf8Decoder.reset();
final CoderResult res = this.utf8Decoder.decode(this.bb, this.cb, true);
System.out.println("CB Chars..: " + this.cb.position());
/*
* Why on earth is UNDERFLOW the correct result of an error-free Decode?!
*/
if (res.isUnderflow() == false) {
res.throwException();
}
return this.bb.position();
}
catch (final MalformedInputException e) {
throw new MalformedInputException (this.bytesChunked + e.getInputLength());
}
catch (final UnmappableCharacterException e) {
throw new UnmappableCharacterException(this.bytesChunked + e.getInputLength());
/*
* (Famous Last Words "UnmappableCharacterException never occurs with UTF-8 Decoder")
*/
}
catch (final CharacterCodingException inputUnderflowOrOutputOverflow) {
throw inputUnderflowOrOutputOverflow;
/*
* (Underflow if the final Character of the final Chunk was incomplete)
* (Overflow if the Output Buffer is too small, which SHOULD NOT HAPPEN with our big CB)
*/
}
}
/**
* Some Debug Writes.
*
* #param debugText
* #param includeRemainder
*/
private void debugBytesIn(final String debugText, final boolean includeRemainder) {
System .out.print(debugText + " previous=" + this.bytesChunked + " bbPos=" + bb.position() + " Bytes.:");
for ( int p = 0; p < bb.position(); p++) {
System .out.print(" " + Integer.toHexString(0xFF & bb.get(p)));
}
if (includeRemainder) {
System .out.print(" Remainder.:");
for (int r = 0; r < this.remainder.length; r++) {
System.out.print(" " + Integer.toHexString(0xFF & this.remainder[r]));
}
}
System .out.println();
}
public static void main(final String[] args) throws IOException {
final ChunkedUtf8Decoder utf8Decoder = new ChunkedUtf8Decoder(Path.of("Utf8-Chars_nn.txt"));
; utf8Decoder.decode();
System.out.println (utf8Decoder.cb.flip()); // TODO Note.: flip()
}
}
Not really: essentially a char[] needs to be built from the byte[] (either direct or indirect) which is backing the MappedByteBuffer.
So something like the following is necessary:
private static CharBuffer readUTF8(final Path path) throws IOException {
final long byteCount = path.toFile().length();
final FileChannel channel = FileChannel.open(PATH);
final MappedByteBuffer byteBuffer = channel.map(MapMode.READ_ONLY, 0, byteCount);
return StandardCharsets.UTF_8.decode(byteBuffer);
}
The following snippet demonstrates the possibility to reuse various components used in the above example.
As stated in the Javadoc, this is a complex matter.
So it should only be considered as a collection of ideas & invocations of various JDK methods, which may or may not be of use, depending on your concrete requirements.
You really need an in-depth understanding of Charsets etc. before using the following...
/**
* It is possible to pre-allocate a CharsetDecoder & CharBuffer & use those multiple times.
* Be aware that this would make your logic MUCH more complicated.
* a) you'll have to control use/reuse & initialisation of the CharsetDecoder
* b) you'll need to ensure the CharBuffer has sufficient capacity
*
* The following is just snippets which may be of use.
*/
public static void main(final String[] args) throws Exception {
final CharsetDecoder utf8Decoder = StandardCharsets.UTF_8.newDecoder();
; utf8Decoder.onMalformedInput (CodingErrorAction.REPLACE);
; utf8Decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
final int charBufLength = 0; // TODO how long?
final CharBuffer charBuf = CharBuffer.allocate(charBufLength);
final int byteBufferLength = 0; // TODO how long?
final MappedByteBuffer byteBuffer = FileChannel.open(Path.of("my File")).map(MapMode.READ_ONLY, 0, byteBufferLength);
readUTF8(utf8Decoder, byteBuffer, charBuf);
}
private static void readUTF8(final CharsetDecoder utf8Decoder, final MappedByteBuffer byteBuffer, final CharBuffer charBuffer) throws IOException {
/* Maybe */ utf8Decoder.reset(); // TODO check out the source of the following decode(byteBuffer) method
/* Maybe */ utf8Decoder.decode(byteBuffer); // TODO check out the source of this method
utf8Decoder.decode(byteBuffer, charBuffer, true); // reuses utf8Decoder & charBuffer
charBuffer.flip();
}
I'm build a blockchain app.
When I run tests in main, no matter what I do, no matter how much time I give it, when I log different things out, I'm unable to get 4 leading zeroes and so complete a difficulty level of 4. I see the log of the binary hashes and many times they have repeating elements, 1111 for instance, but never 0000 until my time is hit and the difficulty decreases to three. I have no idea why.
I borrowed the hash algorithm from an online source and I checked its output against an online hasher and it checked out.
I know with each level of difficulty it increases exponentially but 2^4 is still only 16 and I see other repeating numbers (1111, 1010, any combination except 0000). Is there any reason why this might be the case?
I wanted to provide an abundance of code rather than a shortage. Logically it makes no sense why randomly if all numbers were equally possible, it woudln't turn up 0000* (e.g. 0000101011at some point). therefore Four zeros must not be possible, but why? I waited 100 seconds mutliple times and saw other numbers repeat themselves. I saw it hit at exactly 4 or 3 or 2 seconds each time on the dot when difficulty went to three. When I start at difficulty 5 (genesis block) it will never solve- I'm sure even if I left it running overnight. So what could be going on?
package privblock.gerald.ryan;
import java.nio.charset.StandardCharsets;
import java.security.NoSuchAlgorithmException;
import java.time.Instant;
import java.util.Arrays;
import java.util.Date; // gets time in ms.
import privblock.gerald.ryan.util.CryptoHash;
/**
*
* #author Gerald Ryan Block Class of blockchain app
*
* Description: The block hash is the result of the timestamp, the
* last_hash, the data, the difficulty and the nonce
*
*/
public class Block {
long timestamp;
String lastHash;
String hash;
String[] data;
int difficulty;
int nonce;
// Millisecond basis
;
static long MILLISECONDS = 1;
static long SECONDS = 1000 * MILLISECONDS;
static long MINE_RATE = 2 * SECONDS;
/**
* A block is a unit of storage for a blockchain that supports a cryptocurrency.
*
* #param timestamp
* #param lastHash
* #param hash
* #param data
* #param difficulty
* #param nonce
*/
public Block(long timestamp, String lastHash, String hash, String[] data, int difficulty, int nonce) {
super();
this.timestamp = timestamp;
this.lastHash = lastHash;
this.hash = hash;
this.data = data;
this.difficulty = difficulty;
this.nonce = nonce;
}
public String toString() {
return "\n-----------BLOCK--------\ntimestamp: " + this.timestamp + "\nlastHash: " + this.lastHash + "\nhash: "
+ this.hash + "\ndifficulty: " + this.getDifficulty() + "\nNonce: " + this.nonce
+ "\n-----------------------\n";
}
/**
* Mine a block based on given last block and data until a block hash is found
* that meets the leading 0's Proof of Work requirement.
*
* #param last_block
* #param data
* #return
* #throws NoSuchAlgorithmException
*/
public static Block mine_block(Block last_block, String[] data) throws NoSuchAlgorithmException {
long timestamp = new Date().getTime();
String last_hash = last_block.getHash();
int difficulty = Block.adjust_difficulty(last_block, timestamp);
int nonce = 0;
String hash = CryptoHash.getSHA256(timestamp, last_block.getHash(), data, difficulty, nonce);
String proof_of_work = CryptoHash.n_len_string('0', difficulty);
// System.out.println("Proof of work " + proof_of_work);
String binary_hash = CryptoHash.hex_to_binary(hash);
// System.out.println("binary hash " + binary_hash);
String binary_hash_work_end = binary_hash.substring(0, difficulty);
// System.out.println("binary_Hash_work_end " + binary_hash_work_end);
System.out.println("Difficulty: " + difficulty);
while (!proof_of_work.equalsIgnoreCase(binary_hash_work_end)) {
// System.out.println("Working");
nonce += 1;
timestamp = new Date().getTime();
difficulty = Block.adjust_difficulty(last_block, timestamp);
hash = CryptoHash.getSHA256(timestamp, last_block.getHash(), data, difficulty, nonce);
proof_of_work = CryptoHash.n_len_string('0', difficulty);
binary_hash = CryptoHash.hex_to_binary(hash);
binary_hash_work_end = binary_hash.substring(0, difficulty);
// System.out.println(binary_hash_work_end);
// System.out.println(binary_hash);
// System.out.println(proof_of_work);
}
System.out.println("Solved at Difficulty: " + difficulty);
// System.out.println("Proof of work requirement " + proof_of_work);
// System.out.println("binary_Hash_work_end " + binary_hash_work_end);
// System.out.println("binary hash " + binary_hash);
System.out.println("BLOCK MINED");
return new Block(timestamp, last_hash, hash, data, difficulty, nonce);
}
/**
* Generate Genesis block
*
* #return
*/
public static Block genesis_block() {
long timestamp = 1;
String last_hash = "genesis_last_hash";
String hash = "genesis_hash";
String[] data = { "buy", "privcoin" };
int difficulty = 4;
int nonce = 0;
return new Block(timestamp, last_hash, hash, data, difficulty, nonce);
}
/**
* Calculate the adjusted difficulty according to the MINE_RATE. Increase the
* difficulty for quickly mined blocks. Decrease the difficulty for slowly mined
* blocks.
*
* #param last_block
* #param new_timestamp
*/
public static int adjust_difficulty(Block last_block, long new_timestamp) {
long time_diff = new_timestamp - last_block.getTimestamp();
// System.out.println(time_diff);
if (time_diff < MINE_RATE) {
// System.out.println("Increasing difficulty");
return last_block.getDifficulty() + 1;
} else if (last_block.getDifficulty() - 1 > 0) {
// System.out.println("Decreasing difficulty");
return last_block.getDifficulty() - 1;
} else {
return 1;
}
}
/**
* Validate block by enforcing following rules: - Block must have the proper
* last_hash reference - Block must meet the proof of work requirements -
* difficulty must only adjust by one - block hash must be a valid combination
* of block fields
*
* #param last_block
* #param block
* #return
* #throws NoSuchAlgorithmException
*/
public static boolean is_valid_block(Block last_block, Block block) throws NoSuchAlgorithmException {
String binary_hash = CryptoHash.hex_to_binary(block.getHash());
char[] pow_array = CryptoHash.n_len_array('0', block.getDifficulty());
char[] binary_char_array = CryptoHash.string_to_charray(binary_hash);
if (!block.getLastHash().equalsIgnoreCase(last_block.getHash())) {
System.out.println("The last hash must be correct");
return false;
// Throw exception the last hash must be correct
}
if (!Arrays.equals(pow_array, Arrays.copyOfRange(binary_char_array, 0, block.getDifficulty()))) {
System.out.println("Proof of work requirement not met");
return false;
// throw exception - proof of work requirement not met
}
if (Math.abs(last_block.difficulty - block.difficulty) > 1) {
System.out.println("Block difficulty must adjust by one");
return false;
// throw exception: The block difficulty must only adjust by 1
}
String reconstructed_hash = CryptoHash.getSHA256(block.getTimestamp(), block.getLastHash(), block.getData(),
block.getDifficulty(), block.getNonce());
if (!block.getHash().equalsIgnoreCase(reconstructed_hash)) {
System.out.println("The block hash must be correct");
System.out.println(block.getHash());
System.out.println(reconstructed_hash);
return false;
// throw exception: the block hash must be correct
}
System.out.println("You have mined a valid block");
return true;
}
public int getDifficulty() {
return difficulty;
}
public long getTimestamp() {
return timestamp;
}
public String getHash() {
return hash;
}
public String getLastHash() {
return lastHash;
}
public String[] getData() {
return data;
}
public int getNonce() {
return nonce;
}
public static void main(String[] args) throws NoSuchAlgorithmException {
// String md = CryptoHash.getSHA256("foobar");
Block genesis = genesis_block();
System.out.println(genesis.toString());
// Block bad_block = Block.mine_block(genesis, new String[] { "watch", "AOT" });
// bad_block.lastHash = "evil data";
// System.out.println(bad_block.toString());
Block good_block = mine_block(genesis, new String[] { "foo", "bar" });
System.out.println(good_block.toString());
// System.out.println(mine_block(new_block, new String[] { "crypto", "is", "fun" }).toString());
// System.out.println(Block.is_valid_block(genesis, bad_block)); // returns false as expected
System.out.println(Block.is_valid_block(genesis, good_block));
System.out.println(CryptoHash.hex_to_binary(good_block.getHash()));
Block good_block2 = mine_block(good_block, new String[] { "bar", "foo" });
Block good_block3 = mine_block(good_block2, new String[] { "bar", "foo" });
Block good_block4 = mine_block(good_block3, new String[] { "bar", "foo" });
// Block good_block5 = mine_block(good_block4, new String[] {"bar", "foo"});
// Block good_block6 = mine_block(good_block5, new String[] {"bar", "foo"});
}
}
package privblock.gerald.ryan.util;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
public class CryptoHash {
static HashMap<Character, String> HEX_TO_BIN_TABLE;
static {
HEX_TO_BIN_TABLE = new HashMap<Character, String>();
HEX_TO_BIN_TABLE.put('0', "0000");
HEX_TO_BIN_TABLE.put('1', "0001");
HEX_TO_BIN_TABLE.put('2', "0010");
HEX_TO_BIN_TABLE.put('3', "0011");
HEX_TO_BIN_TABLE.put('4', "0100");
HEX_TO_BIN_TABLE.put('5', "0101");
HEX_TO_BIN_TABLE.put('6', "0110");
HEX_TO_BIN_TABLE.put('7', "0111");
HEX_TO_BIN_TABLE.put('8', "1000");
HEX_TO_BIN_TABLE.put('9', "1001");
HEX_TO_BIN_TABLE.put('a', "1010");
HEX_TO_BIN_TABLE.put('b', "1011");
HEX_TO_BIN_TABLE.put('c', "1100");
HEX_TO_BIN_TABLE.put('d', "1101");
HEX_TO_BIN_TABLE.put('e', "1110");
HEX_TO_BIN_TABLE.put('f', "1111");
}
public static String getSHA256(String... sarray) throws NoSuchAlgorithmException {
String s = concat(sarray);
// System.out.printf("Hashing \"%s\"\n", s);
MessageDigest md;
md = MessageDigest.getInstance("SHA-256");
byte[] b = md.digest(s.getBytes(StandardCharsets.UTF_8));
BigInteger number = new BigInteger(1, b);
StringBuilder hexString = new StringBuilder(number.toString(16));
while (hexString.length() < 32) {
hexString.insert(0, '0');
}
String mds = hexString.toString();
// System.out.printf("hash is:\n%s\n", mds);
return hexString.toString();
}
public static String getSHA256(long timestamp, String last_hash, String[] data, int difficulty, int nonce)
throws NoSuchAlgorithmException {
String s = "";
s += Long.toString(timestamp);
s += last_hash;
s += concat(data);
s += Integer.toString(difficulty);
s += Integer.toString(nonce);
// System.out.printf("Hashing \"%s\"\n", s);
MessageDigest md;
md = MessageDigest.getInstance("SHA-256");
byte[] b = md.digest(s.getBytes(StandardCharsets.UTF_8));
BigInteger number = new BigInteger(1, b);
StringBuilder hexString = new StringBuilder(number.toString(16));
// System.out.println(hexString);
while (hexString.length() < 32) {
hexString.insert(0, '0');
}
String messageDigestString = hexString.toString();
// System.out.printf("hash is:\n%s\n", messageDigestString);
return hexString.toString();
}
public static char[] n_len_array(char c, int n) {
char[] ch = new char[n];
for (int i = 0; i<n; i++) {
ch[i] = c;
}
return ch;
}
public static String n_len_string(char c, int n) {
String s = "";
for (int i = 0; i<n; i++) {
s += c;
}
return s;
}
public static String concat(String... args) {
String s = "";
for (String $ : args) {
s += $;
}
// System.out.println(s);
return s;
}
public static char[] string_to_charray(String str) {
char[] ch = new char[str.length()];
for (int i = 0; i < str.length(); i++) {
ch[i] = str.charAt(i);
}
return ch;
}
public static String string_to_hex(String arg) {
return String.format("%064x", new BigInteger(1, arg.getBytes(StandardCharsets.UTF_8)));
}
public static String hex_to_binary(String hex_string) {
String binary_string = "";
for (int i = 0; i < hex_string.length(); i++) {
binary_string += HEX_TO_BIN_TABLE.get(hex_string.charAt(i));
}
return binary_string;
}
public static String string_to_binary(String raw_string) {
String hex_string = string_to_hex(raw_string);
String bin_string = hex_to_binary(hex_string);
return bin_string;
}
}
ps here's an example of a log I created. I created other cleaner logs too but this shows what we're working with. The first item represents time in milliseconds. The second represents the first four digits of the hash, which is directly below it, followed by the level of difficulty requirement string (what the second item needs to be, length n = difficulty level). The hash just never leads with four zeros, ever, so my hash function or call to the function must be broken in some way.
6479
1000
1000001010111011100110111010100100111010101001111110010101011101101101110000110100110110110000001010001000000010110001100111100111010100110001001001110111011010011100110000011111110100000100000100000010100001000110000111000101100010001111011000110011111101
0000
6479
0101
0101110111010100101010100000001011100011000001110001011011001101001111101011010011000111101101111111001001001010100110101101100111111011001011100101111000011100010001000000000011000111010000101101001000001010101010111001010000101001110011111101011011011000
0000
6479
1000
1000000001000101001110001110110000110111001101100001011000111010111110001011011010011111111101011001110011001001111011011110110010101010101100011011001001110001100010010101001011100001101011011101010000000100111100011011110100000101100111010100100110011101
0000
6479
I figured out the problem. It is indeed often returning 4 leading zeroes but the code as structured is clipping them off (because it doesn't think they have meaning). I noticed by logging that the length is not always a fixed 64byte/256 bit string. Here's the output:
256
1101111000010000100001110001010001010000001010111001100011010011110010001001010001010010100110111000110010000010001110110100100101000000001111111110011100000001010100000111001000111101010001010100110100000000111000100001000000010010010111011110110011110111
256
011001111101001000011111011001111110010110000011001011111010001011010110010100001011010011010010111101100010010111000010110010110111110001010101100000000101001000111110100111011100001110010010101011011000000101100001101110101101010001110000111111110000
252
0001100101110011101000000011000101011100111101110100111110100101110110011100010110001011000110010011110110011001100111010001100100011001011000001011100011011011011011101000111000011100100011011011011000101010011101000110101011000110011100111010000011000011
256
1100110001001001110001100111100010101100100010110111100111001010011011111111100010100110110000010000101000010111111010010101110001100010101010111111111111001011010111010100001010000010111100100100111000010101011000110000100000100111010001000011000000010000
256
So that's solved, or at least I understand the problem. It's amazing what sleep will do.
The assignment consists in decompress a string. In particular, the code has to work for 3 samples as illustrated in the picture.
My code here works in the first 2 of the samples. However, I am not able to come up with the 3rd sample. Probably I did not understand probably the concept of recursion. Can you help me?
import java.util.Scanner;
public class Compression4 {
public static void main(String[] args)
{
Scanner in = new Scanner(System.in);
String input=in.next();
System.out.println(uncompress(input));
}
public static boolean flag = true;
public static String uncompress(String compressedText)
{
return uncompress(compressedText, "", "");
}
public static String getMultiple(String x, int N) {
if (N == 0) return "";
return ""+x+getMultiple(x,N-1);
}
public static String uncompress(String text, String count, String output)
{
if (text.equals(""))
{
return output;
}
if(text.charAt(0) == '(')
{
int FirstIndex = text.indexOf("(")+1;
String inner = text.substring(FirstIndex, text.lastIndexOf(")"));
//System.out.println(inner);
flag = false;
return uncompress (inner, count, output);
}
else if (Character.isLetter(text.charAt(0)))
{
//letter case - need to take the count we have accrued, parse it into an integer and add to output
if (flag==true)
{
//System.out.println(count);// * text.charAt(0);
String s = String.valueOf(text.charAt(0));
output += getMultiple(s,Integer.parseInt(count));
count ="1";
}
else
{
//System.out.println(count);// * text.charAt(0);
output += getMultiple(text,Integer.parseInt(count));
//System.out.println("output: "+output);
count="0";
}
}
else if(Character.isDigit(text.charAt(0)))
{
//digit case - need to add to the count but keep as a string because must be parsed later
if(flag)
count += (""+text.charAt(0));
else
{
count = "0";
count += (""+text.charAt(0));
}
}
//parse the *remainder* of the string, one character at a time, so pass in the substring(1)
return uncompress(text.substring(1), count, output);
}
}
Sorry for the long code but it's more easy to explain with code than with words.
Premise:
I think to the problem as an interpreter of a language to render a string
the language is simple and functional so recursive interpretation is possible
Algorithm phases:
First: tokenize the expression (to work at an higher level of abstraction)
Second: parse the expression just tokenized
Recursion: the logic is based on the syntax of the language. Key concepts of a recursion:
the base cases and the recursive cases
the state necessary to a single recursion (local variables of recursion, those passed as parameters to the recursive method)
the state for the all recursion (global variables of recursion, those read/write in some specific recursion)
I've made many comments to explain what the algorithm is doing. If it's not clear I can explain it better.
import java.util.ArrayList;
import java.util.List;
public class TestStringDecompression {
// simpleExpr examples: a | b | 123a | 123b | 123(a) | 123(ab) | 123(ba) | (ab) | (ba)
// 11ab = aaaaaaaaaaab = = expression = simpleExpr simpleExpr = 11a b
// 4(ab) = abababab = expression = simpleExpr = 4(ab)
// 2(3b3(ab)) = bbbabababbbbababab = expression = compositeExpr = 2 ( simpleExpr simpleExpr ) = 2 ( 3b 3(ab) )
public static void main(String[] args) {
System.out.println(new StringInflater().inflate("11ab"));
System.out.println(new StringInflater().inflate("4(ab)"));
System.out.println(new StringInflater().inflate("2(3b3(ab))"));
}
public static class StringInflater {
// This store the position of the last parsed token
private int posLastParsedToken = 0;
public String inflate(String expression) {
return parse(tokenize(expression), 0, false);
}
/**
* Language tokens:
* <ul>
* <li>literals:
* <ul>
* <li>intLiteral = [0-9]*</li>
* <li>charLiteral = [ab]</li>
* </ul>
* </li>
* <li>separators:
* <ul>
* <li>leftParen = '('</li>
* <li>rightParen = ')'</li>
* </ul>
* </li>
* </ul>
*/
private Object[] tokenize(String expression) {
List<Object> tokens = new ArrayList<Object>();
int i = 0;
while (i < expression.length()) {
if ('0' <= expression.charAt(i) && expression.charAt(i) <= '9') {
String number = "";
while ('0' <= expression.charAt(i) && expression.charAt(i) <= '9' && i < expression.length()) {
number += expression.charAt(i++);
}
tokens.add(Integer.valueOf(number));
} else {
tokens.add(expression.charAt(i++));
}
}
return tokens.toArray(new Object[tokens.size()]);
}
/**
* Language syntax:
* <ul>
* <li>simpleExpr = [intLiteral] charLiteral | [intLiteral] leftParen charLiteral+ rightParen</li>
* <li>compositeExpr = [intLiteral] leftParen (simpleExpr | compositeExpr)+ rightParen</li>
* <li>expression = (simpleExpr | compositeExpr)+</li>
* </ul>
*/
private String parse(Object[] tokens, int pos, boolean nested) {
posLastParsedToken = pos;
String result = "";
if (tokens[pos] instanceof Integer) {
/** it's a intLiteral */
// get quantifier value
int repetition = (int) tokens[pos];
// lookahead for (
if (tokens[pos + 1].equals("(")) {
// composite repetition, it could be:
// simpleExpr: "[intLiteral] leftParen charLiteral+ rightParen"
// compositeExpr: "[intLiteral] leftParen (simpleExpr | compositeExpr)+ rightParen"
result = parse(tokens, pos + 1, true);
} else {
// simple repetition, it could be:
// simpleExpr: [intLiteral] charLiteral
result = parse(tokens, pos + 1, false);
}
result = repeat(result, repetition);
// evaluate the rest of the expression because syntax allows it
if (posLastParsedToken + 1 == tokens.length) {
// end of the expression
return result;
} else {
// there are other simpleExpr or compositeExpr to parse
return result + parse(tokens, posLastParsedToken + 1, false);
}
} else if (tokens[pos].equals('(')) {
/** it's a leftParen */
// an open paren means what follow this token is considered nested (useful for string to treat as char sequence)
return parse(tokens, pos + 1, true);
} else if (tokens[pos].equals(')')) {
/** it's a rightParen */
// a closed paren, nothing to render
return "";
} else {
/** it's a charLiteral */
if (nested) {
// it's nested between paren, so more parsing is requested to consume next charLiteral or next simpleExpr or compositeExpr
return tokens[pos] + parse(tokens, pos + 1, nested);
} else {
// it's not nested between paren, return charLiteral as is
return "" + tokens[pos];
}
}
}
private String repeat(String s, int repetition) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < repetition; i++) {
result.append(s);
}
return result.toString();
}
}
}
I want a regular expression that will extract _A_, 12345, Non_Literal_Left, and Non_Literal_Right from the following string:
... ${_A_}, ${12345}, '${Literal}' $yada yada'$[]' '${Non_Literal_Left} ${Non_Literal_Right}'
The closest I have been able to get is everything less the single quote restriction for literals:
Matcher matcher = Pattern.compile("\\$\\{(\\w+)\\}").matcher(s);
while (matcher.find()) {
result.add(matcher.group(1));
}
Which results in everything I want plus Literal, which I do not want to match.
Thanks in advance...
You could simply use a negative lookbehind:
"(?<!')\\$\\{(\\w+)\\}"
This will now only match if the $ is not preceded by '.
As Matt Ball mentioned in a comment, it might make sense to add another negative lookahead to the end, too:
"(?<!')\\$\\{(\\w+)\\}(?!')"
However, this will only matter if you have invalid/unmatched usage of ' as in ${Literal}' (in this case my first regex will still match Literal, the latter won't).
That was a joy.
Something tells me a RegEx expression would have been a little cleaner.
/**
* Utility class for parsing record field parameters in properties.
*
* #author Ryan
*
*/
public static class PropertyParser {
/**
* Stores the results of parsing a property.
*
* #author Ryan
*
*/
public static class ParsedParameters {
private final Set<String> literals;
private final Set<String> parameters;
private ParsedParameters() {
this.parameters = new HashSet<String>();
this.literals = new HashSet<String>();
}
/**
* Adds a literal property value to this object.
*
* #param string The literal property value to add to this object.
*/
private void addLiteral(String string) {
this.literals.add(string);
}
/**
* Adds a parameter name to this object.
*
* #param string The parameter name to add to this object.
*/
private void addParameter(String string) {
this.parameters.add(string);
}
/**
* Returns the set of literals stored in this object.
*
* #return The set of literals stored in this object.
*/
public Set<String> getLiterals() {
return this.literals;
}
/**
* Returns the set of parameters stored in this object.
*
* #return The set of parameters stored in this object.
*/
public Set<String> getParameters() {
return this.parameters;
}
}
private static final String BAD_FIELD_CHAR =
"Illegal character detected for field parameter: %c";
/**
* Extracts placeholder field name parameters from the input string.
* <p>
* Single quotes can be used to avoid the parser interpreting the ${...}
* as a field parameter.
* <p>
* For example, the parser would not detect any field parameters in the following string:
* <p>
* #!/bin/bash<br>
* # Echos the first argument<br>
* echo '${1}'<br>
* <p>
* The {#link #PropertySubstitutor()} is responsible for removing the single quotes
* surrounding the parameter when substituting the actual property value(s).
* <p>
* <b>Nested Parameters</b>
* <p>
* This parser itself will only parse the inner-most parameter or literal.
* <p>
* For example, ${Some${Value}} would actually be treated as a legal string, with
* 'Value' as the only field parameter extracted. During runtime substitution,
* this would result in ${Somebody} if the record value for the field "Value" was "body".
* <p>
* Theoretically, this parser could then be ran again to extract this generated parameter.
*
* #param string The property to parse for field parameters.
* #return An object containing the parsed parameters and literal values.
* #throws IllegalArgumentException If the property contains parameter syntax
* (i.e. ${text}) but contains illegal characters for the field.
* <p>
* Allowed characters for field names are alpha-numeric and underscores.
*/
public static ParsedParameters parseParametersAndLiterals(String string)
throws IllegalArgumentException {
if ((string == null) || string.isEmpty()) {
return new ParsedParameters();
}
ParsedParameters result = new ParsedParameters();
StringBuffer param = null;
Character badChar = null;
char c;
char p = '^';
boolean close = false;
boolean lQuote = false;
boolean open = false;
int l = string.length();
for (int i = 0; i < l; ++i) {
c = string.charAt(i);
if (!lQuote && (p == '\'') && (c == '$')) {
lQuote = true;
} else if ((p == '$') && (c == '{')) {
param = new StringBuffer();
open = true;
badChar = null;
} else if (open
&& (((c == '}') && (!lQuote || ((1 + i) == l))) || (lQuote && (p == '}')))) {
open = false;
close = true;
} else if (open) {
boolean validCharacter = Character.isLetterOrDigit(c) || (c == '_');
if (validCharacter || (lQuote && (c != '}'))) {
param.append(c);
}
if (!validCharacter && (c != '}')) {
badChar = c;
}
}
if (close) {
if ((badChar != null) && !(lQuote && (p == '}') && (c == '\''))) {
throw new IllegalArgumentException(String.format(BAD_FIELD_CHAR, badChar));
} else if (c != '\'') {
if (param.length() > 0) {
result.addParameter(param.toString());
}
} else {
result.addLiteral(param.toString());
}
lQuote = false;
close = false;
badChar = null;
}
p = c;
}
return result;
}
}
And tests, of course.
public class TestPropertyParser {
private Set<String> literals;
private Set<String> params;
private void assertLiteralsContains(String string) {
assertTrue(this.literals.contains(string));
}
private void assertParamsContains(String string) {
assertTrue(this.params.contains(string));
}
private void assertResultSizes(Integer paramSize, Integer literalSize) {
if (paramSize != null) {
assertNotNull(this.params);
assertEquals((int) paramSize, this.params.size());
} else {
assertNull(this.params);
}
if (literalSize != null) {
assertNotNull(this.literals);
assertEquals((int) literalSize, this.literals.size());
} else {
assertNull(this.literals);
}
}
private void parseAndSet(String stringToParse) {
ParsedParameters result = PropertyParser.parseParametersAndLiterals(stringToParse);
this.literals = result.getLiterals();
this.params = result.getParameters();
}
#Before
public void setup() {
this.params = new HashSet<String>();
this.literals = new HashSet<String>();
}
#Test(expected = IllegalArgumentException.class)
public void testParserInvalidParameterQuoteLeft() {
parseAndSet("'${Invalid Parameter}");
}
#Test(expected = IllegalArgumentException.class)
public void testParserInvalidParameterQuoteRight() {
parseAndSet("${Invalid Parameter}'");
}
#Test(expected = IllegalArgumentException.class)
public void testParserInvalidParameterSpaces() {
parseAndSet(" ${Invalid Parameter}");
}
#Test
public void testParserValidStrings() {
// Initialization condition.
assertResultSizes(0, 0);
// Null string.
parseAndSet(null);
assertResultSizes(0, 0);
// Empty string.
parseAndSet(new String());
assertResultSizes(0, 0);
// Single parameter.
parseAndSet("... ${_A_}, $yada yada'$[]' '${");
assertResultSizes(1, 0);
assertParamsContains("_A_");
// Many parameters and one literal.
parseAndSet("... ${_A_}, ${12345}, '${Literal}''${Non_Literal_Left} ${Non_Literal_Right}' ");
assertResultSizes(4, 1);
assertParamsContains("_A_");
assertParamsContains("12345");
assertParamsContains("Non_Literal_Left");
assertParamsContains("Non_Literal_Right");
assertLiteralsContains("Literal");
// Nested literal and odd bracket placements.
parseAndSet("''${Totally}''$}{$'${Single}");
assertResultSizes(1, 1);
assertParamsContains("Single");
assertLiteralsContains("Totally");
// Subset of ASCII characters.
parseAndSet("`1234567890-=qwertyuiop[]\\asdfghjkl;'zxcvbnm,./!##$%^&*()_+QWERTYUIOP{}|ASDFGHJKL:\"ZXCVBNM<>?");
assertResultSizes(0, 0);
// Illegal characters in literal.
parseAndSet("'${This literal is completely valid}'");
assertResultSizes(0, 1);
assertLiteralsContains("This literal is completely valid");
// Test incomplete literal, no closure.
parseAndSet("'${This literal is never closed");
assertResultSizes(0, 0);
// Test incomplete parameter from left.
parseAndSet("${Never_Closed");
assertResultSizes(0, 0);
// And again... with a parameter at the end.
parseAndSet("${Never_Closed${But_This_Is}");
assertResultSizes(1, 0);
assertParamsContains("But_This_Is");
// Empty parameter.
parseAndSet("${}");
assertResultSizes(0, 0);
// Restarting a new parameter within an already open parameter.
parseAndSet("${Perfectly valid${a}");
assertResultSizes(1, 0);
assertParamsContains("a");
// Variation of the above with quotes.
parseAndSet("'${Perfectly valid'${a}");
assertResultSizes(1, 0);
assertParamsContains("a");
// Variation of the above with quotes.
parseAndSet("${Perfectly valid'${a}");
assertResultSizes(1, 0);
assertParamsContains("a");
// Variation of the above with quotes.
parseAndSet("${Perfectly valid${a}'");
assertResultSizes(1, 0);
assertParamsContains("a");
// Variation of the above with quotes.
parseAndSet("${Perfectly valid'${a}'");
assertResultSizes(0, 1);
assertLiteralsContains("a");
// Variation of the above with spaces.
parseAndSet(" ${ Perfectly valid${a} ");
assertResultSizes(1, 0);
assertParamsContains("a");
// TODO Determine what the desired behavior is for nested literals and parameters.
// Test nested parameter in literal.
parseAndSet("'${Nested ${Parameter}}'");
assertResultSizes(1, 0);
assertParamsContains("Parameter");
// Nested parameter.
parseAndSet("${Nested_${Parameter}}'");
assertResultSizes(1, 0);
assertParamsContains("Parameter");
// Literal nested in a parameter.
parseAndSet(" ${Nested'${Literal}'}");
assertResultSizes(0, 1);
assertLiteralsContains("Literal");
}
}
I would like an efficient utility to generate unique sequences of bytes. UUID is a good candidate but UUID.randomUUID().toString() generates stuff like 44e128a5-ac7a-4c9a-be4c-224b6bf81b20 which is good, but I would prefer dash-less string.
I'm looking for an efficient way to generate a random strings, only from alphanumeric characters (no dashes or any other special symbols).
This does it:
public static void main(String[] args) {
final String uuid = UUID.randomUUID().toString().replace("-", "");
System.out.println("uuid = " + uuid);
}
Dashes don't need to be removed from HTTP request as you can see in URL of this thread.
But if you want to prepare well-formed URL without dependency on data you should use URLEncoder.encode( String data, String encoding ) instead of changing standard form of you data.
For UUID string representation dashes is normal.
I used JUG (Java UUID Generator) to generate unique ID.
It is unique across JVMs. Pretty good to use. Here is the code for your reference:
private static final SecureRandom secureRandom = new SecureRandom();
private static final UUIDGenerator generator = UUIDGenerator.getInstance();
public synchronized static String generateUniqueId() {
UUID uuid = generator.generateRandomBasedUUID(secureRandom);
return uuid.toString().replaceAll("-", "").toUpperCase();
}
You could download the library from: https://github.com/cowtowncoder/java-uuid-generator
Ended up writing something of my own based on UUID.java implementation. Note that I'm not generating a UUID, instead just a random 32 bytes hex string in the most efficient way I could think of.
Implementation
import java.security.SecureRandom;
import java.util.UUID;
public class RandomUtil {
// Maxim: Copied from UUID implementation :)
private static volatile SecureRandom numberGenerator = null;
private static final long MSB = 0x8000000000000000L;
public static String unique() {
SecureRandom ng = numberGenerator;
if (ng == null) {
numberGenerator = ng = new SecureRandom();
}
return Long.toHexString(MSB | ng.nextLong()) + Long.toHexString(MSB | ng.nextLong());
}
}
Usage
RandomUtil.unique()
Tests
Some of the inputs I've tested to make sure it's working:
public static void main(String[] args) {
System.out.println(UUID.randomUUID().toString());
System.out.println(RandomUtil.unique());
System.out.println();
System.out.println(Long.toHexString(0x8000000000000000L |21));
System.out.println(Long.toBinaryString(0x8000000000000000L |21));
System.out.println(Long.toHexString(Long.MAX_VALUE + 1));
}
A simple solution is
UUID.randomUUID().toString().replace("-", "")
(Like the existing solutions, only that it avoids the String#replaceAll call. Regular expression replacement is not required here, so String#replace feels more natural, though technically it still is implemented with regular expressions. Given that the generation of the UUID is more costly than the replacement, there should not be a significant difference in runtime.)
Using the UUID class is probably fast enough for most scenarios, though I would expect that some specialized hand-written variant, which does not need the postprocessing, to be faster. Anyway, the bottleneck of the overall computation will normally be the random number generator. In case of the UUID class, it uses SecureRandom.
Which random number generator to use is also a trade-off that depends on the application. If it is security-sensitive, SecureRandom is, in general, the recommendation. Otherwise, ThreadLocalRandom is an alternative (faster than SecureRandom or the old Random, but not cryptographically secure).
I am amazed to see so many string replace ideas of UUID. How about this:
UUID temp = UUID.randomUUID();
String uuidString = Long.toHexString(temp.getMostSignificantBits())
+ Long.toHexString(temp.getLeastSignificantBits());
This is the fasted way of doing it since the whole toString() of UUID is already more expensive not to mention the regular expression which has to be parsed and executed or the replacing with empty string.
I have just copied UUID toString() method and just updated it to remove "-" from it. It will be much more faster and straight forward than any other solution
public String generateUUIDString(UUID uuid) {
return (digits(uuid.getMostSignificantBits() >> 32, 8) +
digits(uuid.getMostSignificantBits() >> 16, 4) +
digits(uuid.getMostSignificantBits(), 4) +
digits(uuid.getLeastSignificantBits() >> 48, 4) +
digits(uuid.getLeastSignificantBits(), 12));
}
/** Returns val represented by the specified number of hex digits. */
private String digits(long val, int digits) {
long hi = 1L << (digits * 4);
return Long.toHexString(hi | (val & (hi - 1))).substring(1);
}
Usage:
generateUUIDString(UUID.randomUUID())
Another implementation using reflection
public String generateString(UUID uuid) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException {
if (uuid == null) {
return "";
}
Method digits = UUID.class.getDeclaredMethod("digits", long.class, int.class);
digits.setAccessible(true);
return ( (String) digits.invoke(uuid, uuid.getMostSignificantBits() >> 32, 8) +
digits.invoke(uuid, uuid.getMostSignificantBits() >> 16, 4) +
digits.invoke(uuid, uuid.getMostSignificantBits(), 4) +
digits.invoke(uuid, uuid.getLeastSignificantBits() >> 48, 4) +
digits.invoke(uuid, uuid.getLeastSignificantBits(), 12));
}
I use org.apache.commons.codec.binary.Base64 to convert a UUID into a url-safe unique string that is 22 characters in length and has the same uniqueness as UUID.
I posted my code on Storing UUID as base64 String
Well, since an UUID gets added the hyphens (dashes) on toString() we can steal the implementation from Java's own implementation, shorting the byte array to 32 and adjusting the offset.
public static String special() {
UUID uuid = UUID.randomUUID();
return fastUUID(uuid.getLeastSignificantBits(), uuid.getMostSignificantBits());
}
private static String fastUUID(long lsb, long msb) {
byte[] buf = new byte[32];
formatUnsignedLong0(lsb, 4, buf, 20, 12);
formatUnsignedLong0(lsb >>> 48, 4, buf, 16, 4);
formatUnsignedLong0(msb, 4, buf, 12, 4);
formatUnsignedLong0(msb >>> 16, 4, buf, 8, 4);
formatUnsignedLong0(msb >>> 32, 4, buf, 0, 8);
return new String(buf, 0);
}
private static final char[] digits = {
'0' , '1' , '2' , '3' , '4' , '5' ,
'6' , '7' , '8' , '9' , 'a' , 'b' ,
'c' , 'd' , 'e' , 'f' , 'g' , 'h' ,
'i' , 'j' , 'k' , 'l' , 'm' , 'n' ,
'o' , 'p' , 'q' , 'r' , 's' , 't' ,
'u' , 'v' , 'w' , 'x' , 'y' , 'z'
};
private static void formatUnsignedLong0(long val, int shift, byte[] buf, int offset, int len) {
int charPos = offset + len;
int radix = 1 << shift;
int mask = radix - 1;
do {
buf[--charPos] = (byte)digits[((int) val) & mask];
val >>>= shift;
} while (charPos > offset);
}
Running it:
public static void main(String[] args) {
IntStream.range(0, 100).forEach(i-> {
System.out.println(special());
});
}
Generates:
23f57da8a2784bb5acca553030f82e61
a14427efd8d147fdae315c1cf20fc53c
ee972aa1cf85414ca364bef5c74a7e57
6877ef35eab94b9485c5dd7c8c5a8a56
578721476629422381c0f625e22564a8
dbf60f068b5443d7bc6e5280696fed9f
dd611e870700480d81d394dd2125316c
04d71b9379ef4db49c28e113485ea76d
fd4e8cf3f85a45ae8c1b9bfe3e489a4a
858c4e8297f849b784b65b6096dec4d4
b30a8ca318a349b486b5693814422555
351c2fab9bc1426fa3bb512484628f12
9ce59e01db38405aab82d46f2a236880
5ffb5acb547a4f15a4621b406391bc0d
541b5fba8ddb4f1ebbd59cdcd5f59f7c
77f9460c4baa43a7bbaaf7f2aff205bd
85fa5254305b4c72b1b7c0103aaee269
062d45aa86694b06aad841236b839341
7a265293560f4223ab8248fda502c89b
b748c13ac45747b99aead4b0a2d7d179
cbcbf623c75d407fa3c88cfc89a90ed4
da263eed8771496faebb6290527f77fa
22231088dec04cffa40fb79ff56c6453
594a66de4b874b3491649c5d033917f6
4f6802ebd0cc4a39b25a67191c3af09d
8878b7ab8aa445cdadbef0f7c70d3deb
2c3ed0154f0c4ddbad498b7ae928b9bc
cac1dcaa80e54e2db3248987d2dbda4a
f9a3567e6dd54bf5900444c8b1c03815
f0d25d7b615a4495b51c01ab15093a88
243e45926311437c8b26cede2dc7de25
e4738c50e4cd448fbac252571c0907df
261d3593cc054569bcd645885d22c2ee
64a4796356a04cc4a09506aeb6f5b8fd
9aeebcbdde074ed69738589ca9bde0f1
ec040c956861466b84ed7f7cec601be0
18bd233781e44e7cb152800db4c4edc7
1b7b251df1244e8db46a45c186aada2b
3e32f644c9074cb3bbb15c5be1d9b95e
625309e3ffd14a90bfbd6d48142ac60e
664f0cf347ce4767add576da584526e7
fe3893fd376849fe9fed00e328e61470
254ce1441bbf4a7eae1cdf8d288e61e8
90896c6b309a49f48cc3b7a1570e1846
5f47acd1319245648098c1aec9b95f23
f798033052614b9eae8da7eba4ba3475
3471c4320e4e431eb1fa9f5eb5cb21e0
855f473fed034b1fa17f4f65b850e03b
1245de826d0d4373bdd4cf2157792954
543a8b16efca4fa2b5263315e8b21660
2dc186d699274257922853d783c0ec13
a92e6b1783db4b49a4aaa424b9e1b03f
16773feb48054cf0942a2a27204b3572
1e58da2107ac4ee39e28a93b32e1df1d
67622c19498d4178a1bab6b19087f2c2
412b6b4474fc43ccbeb1e7707b6420ee
7d0fd632913c425eb5f087600ccea870
439687baddb44852a43048b04d38427c
8b2dcc4e50464429a18b11e4aacf51a2
2fccb1c832894fe6b0b61bbdf175cd39
6d224b3d6e8747319fcf01b0309d8a0f
b4982e3b4b594cb4b334c95c2c96355e
c47fbaf90d1d4e9caf211f93b742631f
9440271e8ba6447d9a008e89a93016a6
8d24274b6a3f436a88362438aa6a221d
60452bd3f71747ed8c3706abb2235bd5
6fa93f2ee30740b89496439dd7227a4f
cc17504cf80641f882c8665ae166ba44
743efe8eac6e47a789928da4fb5b6f70
4c4d2df3461448c4a3e934cf4a7ea74f
b231eb3fb46240d38157764e8906aa7b
a234ae65f7ed48f6b1887644eed36cc2
c9cd5ed3df3f4a27957b45498f0c48ef
3eb2fbbb0a714bb7986aef3ee34f0254
d15968e605f0440c9e740e3f4e498a9f
63a8d50e8db24b91a13d4ac2fb6f7d5b
5377df9296154c57926672ca8b3c9478
a4db4a3a9d5148648a23aa7f4f77f1e0
d0aee355a2ba42de89d659385514b0fa
e92e7702481a4575a66d59c061459c5b
1b6c542d8f994d85a1312ab2cf4545ce
88e347a515474ec59013673e5402b97f
2187d9b2dc2b4d96baceade5ae99db44
4d641e69ca5b4acf90f8afe238d8a940
9c0f4c101c434831b928114c5fc0c401
140e16f6cf134785a98ae9baee5b9e7b
4dac5910f4d047e1b213c058e2230bf3
fb50a7e6333f49e4b469234426d5002f
c96c5f2fa167458eaa6d01997d90a980
1e79721e587c4a92aa55cdf8195c8c55
0da27fc5d8384ce299197b4e06cda1d4
a5e32d9cf5834e86b3fe02bc0e3104d6
2dc1826647594b1fb728de67d3df363c
0276371815254198bd22cc76f901b332
bf9d77b7b4a64e7a97ade2a62af1f8e0
268cce3249f64895b6b47e86cf296e5b
d523201fc950435f803bf89d5f042c45
607a4306b90b467f8b19c2c943bc92ef
adfa9fb63a874ca1ad746ff573f03f28
fe88132c70d141e8839ce9e7f0308750
Should be just as efficient (more effecient) than Javas actually.
This utility class generates UUIDs as String with or without dashes.
package your.package.name;
import java.security.SecureRandom;
import java.util.Random;
/**
* Utility class that creates random-based UUIDs.
*
*/
public abstract class RandomUuidStringCreator {
private static final int RANDOM_VERSION = 4;
/**
* Returns a random-based UUID as String.
*
* It uses a thread local {#link SecureRandom}.
*
* #return a random-based UUID string
*/
public static String getRandomUuid() {
return getRandomUuid(SecureRandomLazyHolder.SECURE_RANDOM);
}
/**
* Returns a random-based UUID as String WITH dashes.
*
* It uses a thread local {#link SecureRandom}.
*
* #return a random-based UUID string
*/
public static String getRandomUuidWithDashes() {
return format(getRandomUuid());
}
/**
* Returns a random-based UUID String.
*
* It uses any instance of {#link Random}.
*
* #return a random-based UUID string
*/
public static String getRandomUuid(Random random) {
long msb = 0;
long lsb = 0;
// (3) set all bit randomly
if (random instanceof SecureRandom) {
// Faster for instances of SecureRandom
final byte[] bytes = new byte[16];
random.nextBytes(bytes);
msb = toNumber(bytes, 0, 8); // first 8 bytes for MSB
lsb = toNumber(bytes, 8, 16); // last 8 bytes for LSB
} else {
msb = random.nextLong(); // first 8 bytes for MSB
lsb = random.nextLong(); // last 8 bytes for LSB
}
// Apply version and variant bits (required for RFC-4122 compliance)
msb = (msb & 0xffffffffffff0fffL) | (RANDOM_VERSION & 0x0f) << 12; // apply version bits
lsb = (lsb & 0x3fffffffffffffffL) | 0x8000000000000000L; // apply variant bits
// Convert MSB and LSB to hexadecimal
String msbHex = zerofill(Long.toHexString(msb), 16);
String lsbHex = zerofill(Long.toHexString(lsb), 16);
// Return the UUID
return msbHex + lsbHex;
}
/**
* Returns a random-based UUID as String WITH dashes.
*
* It uses a thread local {#link SecureRandom}.
*
* #return a random-based UUID string
*/
public static String getRandomUuidWithDashes(Random random) {
return format(getRandomUuid(random));
}
private static long toNumber(final byte[] bytes, final int start, final int length) {
long result = 0;
for (int i = start; i < length; i++) {
result = (result << 8) | (bytes[i] & 0xff);
}
return result;
}
private static String zerofill(String string, int length) {
return new String(lpad(string.toCharArray(), length, '0'));
}
private static char[] lpad(char[] chars, int length, char fill) {
int delta = 0;
int limit = 0;
if (length > chars.length) {
delta = length - chars.length;
limit = length;
} else {
delta = 0;
limit = chars.length;
}
char[] output = new char[chars.length + delta];
for (int i = 0; i < limit; i++) {
if (i < delta) {
output[i] = fill;
} else {
output[i] = chars[i - delta];
}
}
return output;
}
private static String format(String string) {
char[] input = string.toCharArray();
char[] output = new char[36];
System.arraycopy(input, 0, output, 0, 8);
System.arraycopy(input, 8, output, 9, 4);
System.arraycopy(input, 12, output, 14, 4);
System.arraycopy(input, 16, output, 19, 4);
System.arraycopy(input, 20, output, 24, 12);
output[8] = '-';
output[13] = '-';
output[18] = '-';
output[23] = '-';
return new String(output);
}
// Holds lazy secure random
private static class SecureRandomLazyHolder {
static final Random SECURE_RANDOM = new SecureRandom();
}
/**
* For tests!
*/
public static void main(String[] args) {
System.out.println("// Using `java.security.SecureRandom` (DEFAULT)");
System.out.println("RandomUuidCreator.getRandomUuid()");
System.out.println();
for (int i = 0; i < 5; i++) {
System.out.println(RandomUuidStringCreator.getRandomUuid());
}
System.out.println();
System.out.println("// Using `java.util.Random` (FASTER)");
System.out.println("RandomUuidCreator.getRandomUuid(new Random())");
System.out.println();
Random random = new Random();
for (int i = 0; i < 5; i++) {
System.out.println(RandomUuidStringCreator.getRandomUuid(random));
}
}
}
This is the output:
// Using `java.security.SecureRandom` (DEFAULT)
RandomUuidStringCreator.getRandomUuid()
'f553ca75657b4b5d85bedf1082785a0b'
'525ecc389e934f209b97d0f0db09d9c6'
'93ec6425bb04499ab47b790fd013ab0d'
'c2d438c620ea4cd5baafd448f9fe945b'
'fb4bc5734931415e94e78da62cb5fe0d'
// Using `java.util.Random` (FASTER)
RandomUuidStringCreator.getRandomUuid(new Random())
'051360b5c92d40fbbb89b40842adbacc'
'a993896538aa43faacbcfd83f913f38b'
'720684d22c584d5299cb03cdbc1912d2'
'82cf94ea296a4a138a92825a0068d4a1'
'a7eda46a215c4e55be3aa957ba74ca9c'
There's a codec in uuid-creator that can do it more efficiently: Base16Codec. Example:
// Returns a base-16 string
// It is much faster than doing `uuid.toString().replaceAll("-", "")`.
UuidCodec<String> codec = new Base16Codec();
String string = codec.encode(UUID.randomUUID());