Pagination in Getting the File - java

I have a location where 3000 files is stored. But i want to get the list of 1000 files at a time and in next call another 1000 files and so on.
Please find my below code :
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class FileSystem {
public static void main(String args[]) throws Exception {
FileSystem.createListFile();
FileSystem.getFileInBatch();
}
private static void getFileInBatch() {
int MAX_INDEX= 1000;
try (Stream<Path> walk = Files.walk(Paths.get("C://FileTest"))) {
List<String> result = walk.filter(p -> Files.isRegularFile(p) && p.getFileName().toString().endsWith(".txt"))
.sorted(Comparator.comparingInt(FileSystem::pathToInt))
.map(x -> x.toString()).limit(MAX_INDEX).collect(Collectors.toList());
result.forEach(System.out::println);
System.out.println(result.size());
} catch (IOException e) {
e.printStackTrace();
}
}
private static int pathToInt(final Path path) {
return Integer.parseInt(path.getFileName()
.toString()
.replaceAll("Aamir(\\d+).txt", "$1")
);
}
private static void createListFile() throws IOException {
for (int i = 0; i < 3000; i++) {
File file = new File("C://FileTest/Aamir" + i + ".txt");
if (file.createNewFile()) {
System.out.println(file.getName() + " is created!");
}
}
}
}
I am able to get the first 1000 (Aamir0.txt to Aamir999.txt) files using the limit in streams.
Now how can i get the next 1000 files ( Aamir1000.txt to Aamir1999.txt)

You can use skip in your Stream. For example:
int toSkip = 1000; // define as method param/etc.
List<String> result = walk.filter(p -> Files.isRegularFile(p) && p.getFileName().toString().endsWith(".txt"))
.sorted(Comparator.comparingInt(FileSystem::pathToInt))
.map(x -> x.toString()).skip(toSkip).limit(MAX_INDEX).collect(Collectors.toList());

Related

Scraping the main content only of a web page by JSoup and save to file

I want to scrape the main content only of a web page by JSoup and save to a file.
Test web page:
https://netbasal.com/using-the-angular-http-client-in-angular-v15-f4bec3c11926
My test program use java 11, JSoup 1.15.3.
The output is missing:
the title "Using the Angular Http Client in Angular v15",
the source codes (by Github),
the heading "Interceptors in Lazy Loaded Modules".
How to modify the test program to add the missing content?
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class App
{
public static void main( String[] args )
{
try {
String dir = "/home/ak/using-the-http-client-v15";
String filename = "Out";
Path path = Paths.get(dir + "/" + filename + ".txt");
Document doc = Jsoup.connect("https://netbasal.com/using-the-angular-http-client-in-angular-v15-f4bec3c11926").get();
String searchStrings[] = {
"pw-post-body-paragraph"
};
Set<String> tagNameSet = Set.of("span", "p");
List<String> output = new ArrayList<String>();
for (String searchString : searchStrings) {
Elements searchStringElements = doc.getElementsByClass(searchString);
if (searchStringElements == null)
continue;
if (searchStringElements.hasClass(searchString)) {
for (Element e : searchStringElements) {
if (tagNameSet.contains(e.tagName())) {
String str = e.wholeText() + "\n\n";
output.add(str);
}
}
}
int size = output.size();
for (int i = 0; i < size; i++) {
// Java 11 Append mode
Files.writeString(path, output.get(i),
StandardOpenOption.CREATE, StandardOpenOption.APPEND);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

How to get file name having particular content in it using java?

Here I am trying to read a folder containing .sql files and I am getting those files in an array, now my requirement is to read every file and find particular word like as join if join is present in the file return filename or else discard , someone can pls help me with this ..
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
public class Filter {
public static List<String> textFiles(String directory) {
List<String> textFiles = new ArrayList<String>();
File dir = new File(directory);
for (File file : dir.listFiles()) {
if (file.getName().endsWith((".sql"))) {
textFiles.add(file.getName());
}
}
return textFiles;
}
public static void getfilename(String directory) throws IOException {
List<String> textFiles = textFiles(directory);
for (String string : textFiles) {
Path path = Paths.get(string);
try (Stream<String> streamOfLines = Files.lines(path)) {
Optional<String> line = streamOfLines.filter(l -> l.contains("join")).findFirst();
if (line.isPresent()) {
System.out.println(path.getFileName());
} else
System.out.println("Not found");
} catch (Exception e) {
}
}
}
public static void main(String[] args) throws IOException {
getfilename("/home/niteshb/wave1-master/wave1/sql/scripts");
}
}
You can search word in file as belwo, pass the path of file
try(Stream <String> streamOfLines = Files.lines(path)) {
Optional <String> line = streamOfLines.filter(l ->
l.contains(searchTerm))
.findFirst();
if(line.isPresent()){
System.out.println(line.get()); // you can add return true or false
}else
System.out.println("Not found");
}catch(Exception e) {}
}
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
public class Filter {
public static List<String> textFiles(String directory) {
List<String> textFiles = new ArrayList<String>();
File dir = new File(directory);
for (File file : dir.listFiles()) {
if (file.getName().endsWith((".sql"))) {
textFiles.add(file.getAbsolutePath());
}
}
System.out.println(textFiles.size());
return textFiles;
}
public static String getfilename(String directory) throws IOException {
List<String> textFiles = textFiles(directory);
for (String string : textFiles) {
Path path = Paths.get(string);
try (Stream<String> streamOfLines = Files.lines(path)) {
Optional<String> line = streamOfLines.filter(l -> l.contains("join")).findFirst();
if (line.isPresent()) {
System.out.println(path.getFileName());
} else
System.out.println("");
} catch (Exception e) {
}
}
return directory;
}
public static void main(String[] args) throws IOException {
getfilename("/home/wave1-master/wave1/sql/");
}
}

Need to count the number of files located within each zip file in a directory folder in JAVA

I have a folder which has a series of Zip files within it. I am trying to iterate through the folder and count the number of files that are in each zip file. I have created two pieces of code, I am just not sure how to put them together to get my desired results. Both codes are placed into try/catch blocks and they both work perfectly independently. This is using Eclipse, written in Java.
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.stream.Collectors;
import java.util.zip.ZipFile;
import java.io.File;
import java.util.List;
public class KZF {
public static void main(String[] args) {
// TODO Auto-generated method stub
// Try/Catch Block counts the number of files within a given zip file
try {
ZipFile zipFile = new ZipFile(
"C:\\Users\\username\\Documents\\Temp\\AllKo\\Policy.zip");
int NumberOfFiles = zipFile.size() - 1;
// String name = zipFile.getName();
Path path = Paths
.get("C:\\Users\\username\\Documents\\Temp\\AllKo\\Policy.zip");
Path filename = path.getFileName();
System.out.print("The number of files in: ");
// System.out.print(name);
System.out.print(filename.toString());
System.out.print(" are: ");
System.out.print(NumberOfFiles + " file(s)");
zipFile.close();
}
catch (IOException ioe) {
System.out.println("Error opening zip file" + ioe);
}
// ----------------------------------------------------------------------------------------------------------
// Creates list of every file specified folder
String dirLocation = "C:\\Users\\username\\Documents\\Temp\\AllKo";
try { List<File> files = Files.list(Paths.get(dirLocation))
.map(Path::toFile) .collect(Collectors.toList());
files.forEach(System.out::println);
} catch(IOException e) { Error }
}
}
You must be careful about opening/closing streams, so you can try something like this:
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Enumeration;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
public class KZF
{
static int findNumberOfFiles(File file) {
try (ZipFile zipFile = new ZipFile(file)) {
return zipFile.stream().filter(z -> !z.isDirectory()).count();
} catch (Exception e) {
return -1;
}
}
static String createInfo(File file) {
int tot = findNumberOfFiles(file);
return (file.getName() + ": " + (tot >= 0 ? tot + " files" : "Error reading zip file"));
}
public static void main(String[] args) throws IOException {
String dirLocation = "C:\\Users\\username\\Documents\\Temp\\AllKo";
try (Stream<Path> files = Files.list(Paths.get(dirLocation))) {
files
.filter(path -> path.toFile().isFile())
.filter(path -> path.toString().toLowerCase().endsWith(".zip"))
.map(Path::toFile)
.map(KZF::createInfo)
.forEach(System.out::println);
}
}
}

How to group files based on filenames from multiple paths in Java

I would like to group specific files based on their file names from multiple paths. I have followed this stackoverflow link. I have not been able to loop through each file after I start streaming the path to find that specific file name.
Here are the paths with files contents:
/var/tmp/data_sample1/data2_first_example.set.csv
/var/tmp/data_sample1/data3_first_example.set.csv
/var/tmp/data_sample1/data1_first_example.set.csv
/var/tmp/data_sample2/data2_second_example.set.csv
/var/tmp/data_sample2/data1_second_example.set.csv
/var/tmp/data_sample2/data3_second_example.set.csv
/tmp/csv_files/data_sample3/data2_third_example.set.csv
/tmp/csv_files/data_sample3/data1_third_example.set.csv
/tmp/csv_files/data_sample3/data3_third_example.set.csv
Enum Class:
enum PersonType {
A,
B
}
FileName.java
import java.util.Arrays;
import java.util.List;
public class FileName {
private final String first = "_first_sample";
private final String second = "_second_sample";
private final String third = "_third_sample";
private final List<String> filenames = Arrays.asList(first, second, third);
public List<String> getFilenames() {
return filenames;
}
}
CSVFiles.java
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
public class CSVFiles {
private PersonType personType;
private List<String> fileNames = new ArrayList<>();
private List<File> firstSample = new ArrayList<>();
private List<File> secondSample = new ArrayList<>();
private List<File> thirdSample = new ArrayList<>();
public CSVFiles(PersonType personType, List<String> paths) {
if (personType == PersonType.A) {
this.personType = personType;
FileName fileName = new FileName();
this.fileNames = fileName.getFilenames();
setCSVFiles(paths);
}
}
public List<File> setCSVFiles(List<String> paths) {
List<Path> collect = paths.stream()
.flatMap(path -> {
try {
return Files.find(Paths.get(path), Integer.MAX_VALUE,
(p, attrs) -> attrs.isRegularFile()
&& p.toString().contains(".set")
&& p.toString().endsWith(".csv")
);
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
}).collect(Collectors.toList());
return collect.stream()
.map(Path::toFile)
.filter(file -> {
if (file.getName().contains("_first_sample")) {
firstSample.add(file);
return true;
} else if (file.getName().contains("_second_sample")) {
secondSample.add(file);
return true;
} else if (file.getName().contains("_third_sample")) {
thirdSample.add(file);
return true;
}
return false;
})
.collect(Collectors.toList());
}
}
CSVFilesTest.java
import org.junit.Test;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
public class CSVFilesTest {
#Test
public void test() {
String data_sample1 = "/var/tmp/data_sample1";
String data_sample2 = "/var/tmp/data_sample2";
String data_sample3 = "/tmp/csv_files/data_sample3";
List<String> paths = Arrays.asList(data_sample1, data_sample2, data_sample3);
System.out.println(paths);
CSVFiles csvFiles = new CSVFiles(PersonType.A, paths);
}
}
Desired Output:
firstSample: [data1_first_example.set.csv, data2_first_example.set.csv, data3_first_example.set.csv]
secondSample: [data1_second_example.set.csv, data2_second_example.set.csv, data3_second_example.set.csv]
thirdSample: [data1_third_example.set.csv, data2_third_example.set.csv, data3_third_example.set.csv]
Any feedback is appreciated!
Solution thanks to "sync it" comments:
public Map<String, List<String>> setCSVFiles(List<String> paths) {
List<Path> collect = paths.stream()
.flatMap(path -> {
try {
return Files.find(Paths.get(path), Integer.MAX_VALUE,
(p, attrs) -> attrs.isRegularFile()
&& p.toString().contains(".set")
&& p.toString().endsWith(".csv")
);
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
}).collect(Collectors.toList());
return collect.stream()
.map(Path::toString)
.collect(Collectors.groupingBy(path ->
path.substring(path.lastIndexOf("/")+1)
));
}

Getting all columns from get result in HBase dynamically

I am working on a Get object as retrieved from a table in Habse. I want to dynamically retrieve all column values related to that get since I don't know the exact name of column families
val result1 = hTable.get(g)
if (!result1.isEmpty) {
//binaryEpisodes = result1.getValue(Bytes.toBytes("episodes"),Bytes.toBytes("episodes"))
//instead of above retrieve all values dynamically
}
Simple way :
get rawcells and knowing CF , columns information.
You have to do something like below example
public static void printResult(Result result, Logger logger) {
logger.info("Row: ");
for (Cell cell : result.rawCells()) {
byte[] family = CellUtil.cloneFamily(cell);
byte[] column = CellUtil.cloneQualifier(cell);
byte[] value = CellUtil.cloneValue(cell);
logger.info("\t" + Bytes.toString(family) + ":" + Bytes.toString(column) + " = " + Bytes.toString(value));
}
}
Hbase Admin way : Hbase client API was exposed by HbaseAdmin class like below...
Client would be like
package mytest;
import com.usertest.*;
import java.io.IOException;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class ListHbaseTablesAndColumns {
public static void main(String[] args) {
try {
HbaseMetaData hbaseMetaData =new HbaseMetaData();
for(String hbaseTable:hbaseMetaData .getTableNames(".*yourtables.*")){
for (String column : hbaseMetaData .getColumns(hbaseTable, 10000)) {
System.out.println(hbaseTable + "," + column);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Use below class to Get HbaseMetaData..
package com.usertest;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.PageFilter;
import java.io.IOException;
import java.util.*;
import java.util.regex.Pattern;
public class HbaseMetaData {
private HBaseAdmin hBaseAdmin;
private Configuration hBaseConfiguration;
public HbaseMetaData () throws IOException {
this.hBaseConfiguration = HBaseConfiguration.create();
this.hBaseAdmin = new HBaseAdmin(hBaseConfiguration);
}
/** get all Table names **/
public List<String> getTableNames(String regex) throws IOException {
Pattern pattern=Pattern.compile(regex);
List<String> tableList = new ArrayList<String>();
TableName[] tableNames=hBaseAdmin.listTableNames();
for (TableName tableName:tableNames){
if(pattern.matcher(tableName.toString()).find()){
tableList.add(tableName.toString());
}
}
return tableList;
}
/** Get all columns **/
public Set<String> getColumns(String hbaseTable) throws IOException {
return getColumns(hbaseTable, 10000);
}
/** get all columns from the table **/
public Set<String> getColumns(String hbaseTable, int limitScan) throws IOException {
Set<String> columnList = new TreeSet<String>();
HTable hTable=new HTable(hBaseConfiguration, hbaseTable);
Scan scan=new Scan();
scan.setFilter(new PageFilter(limitScan));
ResultScanner results = hTable.getScanner(scan);
for(Result result:results){
for(KeyValue keyValue:result.list()){
columnList.add(
new String(keyValue.getFamily()) + ":" +
new String(keyValue.getQualifier())
);
}
}
return columnList;
}
}

Categories

Resources