I am using spring batch spring cloud task to automate data read/write (read from file and store in MongoDb). In my use case i have 2 (will add 1 more step after successfully achieve 2) steps. I am trying to use remote partitioning integrate spring cloud task DeployerPartitionHandler.java class master node to slave node as a broker, that is what i understand instead of using activemg/rabbitmq spring integration. I have created 2 partitioner and 2 partitioner handlers bean for my 2 steps. Below is sample code. I am getting below exception.
2020-03-11 12:03:59 - o.s.batch.core.step.AbstractStep - Encountered an error executing step step1 in job Job669228617
java.lang.NullPointerException: null
at org.springframework.cloud.task.batch.partition.DeployerPartitionHandler.launchWorker(DeployerPartitionHandler.java:347)
at org.springframework.cloud.task.batch.partition.DeployerPartitionHandler.launchWorkers(DeployerPartitionHandler.java:313)
at org.springframework.cloud.task.batch.partition.DeployerPartitionHandler.handle(DeployerPartitionHandler.java:302)
at org.springframework.batch.core.partition.support.PartitionStep.doExecute(PartitionStep.java:106)
at org.springframework.batch.core.step.AbstractStep.execute(AbstractStep.java:208)
at org.springframework.batch.core.job.SimpleStepHandler.handleStep(SimpleStepHandler.java:148)
at org.springframework.batch.core.job.AbstractJob.handleStep(AbstractJob.java:410)
at org.springframework.batch.core.job.SimpleJob.doExecute(SimpleJob.java:136)
at org.springframework.batch.core.job.AbstractJob.execute(AbstractJob.java:319)
at org.springframework.batch.core.launch.support.SimpleJobLauncher$1.run(SimpleJobLauncher.java:147)
at org.springframework.core.task.SyncTaskExecutor.execute(SyncTaskExecutor.java:50)
at org.springframework.batch.core.launch.support.SimpleJobLauncher.run(SimpleJobLauncher.java:140)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.springframework.aop.support.AopUtils.invokeJoinpointUsingReflection(AopUtils.java:344)
at org.springframework.aop.framework.ReflectiveMethodInvocation.invokeJoinpoint(ReflectiveMethodInvocation.java:198)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:163)
at org.springframework.batch.core.configuration.annotation.SimpleBatchConfiguration$PassthruAdvice.invoke(SimpleBatchConfiguration.java:127)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:186)
at org.springframework.aop.framework.JdkDynamicAopProxy.invoke(JdkDynamicAopProxy.java:212)
at com.sun.proxy.$Proxy77.run(Unknown Source)
at org.springframework.boot.autoconfigure.batch.JobLauncherCommandLineRunner.execute(JobLauncherCommandLineRunner.java:192)
#Configuration
#ComponentScan(basePackageClasses = DbConfig.class)
public JobConfig {
#Autowired
private TaskLauncher taskLauncher;
#Autowired
private JobExplorer jobExplorer;
#Autowired
private TaskRepository taskRepository;
#Autowired
private Reader1 reader2;
#Autowired
private Writer2 writer2;
#Autowired
private JobBuilderFactory jobBuilderFactory;
#Autowired
private StepBuilderFactory stepBuilderFactory;
#Autowired
private DelegatingResourceLoader resourceLoader;
#Autowired
private ConfigurableApplicationContext context;
#Autowired
public JobRepository jobRepository;
#Autowired
private Environment environment;
private static final int GRID_SIZE = 2;
#Autowired
private Reader1 reader1;
#Autowired
private Writer2 writer2;
#Autowired
#Qualifier("partitionHandler1")
private PartitionHandler partitionHandler1;
#Autowired
#Qualifier("partitionHandler2")
private PartitionHandler partitionHandler2;
#Bean
#Profile("master")
public Job masterJob() {
Random random = new Random();
return this.jobBuilderFactory.get("masterJob" + random.nextInt())
.start(step1())
.next(step2())
.build();
}
#Bean
#Profile("master")
public Step step1() {
return this.stepBuilderFactory.get("step1")
.partitioner("slaveStep1", partitioner1())
.partitionHandler(partitionHandler1)
.taskExecutor(taskExecutor())
.build();
}
#Bean
#Profile("master")
public Step step2() {
return this.stepBuilderFactory.get("step2")
.partitioner("slaveStep2",partitioner2())
.partitionHandler(partitionHandler2)
.taskExecutor(taskExecutor())
.build();
}
#Bean
#Profile("worker")
public DeployerStepExecutionHandler stepExecutionHandler(JobExplorer jobExplorer) {
return new DeployerStepExecutionHandler(this.context, jobExplorer, this.jobRepository);
}
#Bean
public Step slaveStep1() {
return this.stepBuilderFactory.get("slaveStep1")
.<Domain1, Domain1>chunk(50)
.reader(reader1)
.writer(writer1)
.listener(stepExecutionListner())
.build();
}
#Bean
public Step slaveStep2() {
return this.stepBuilderFactory.get("slaveStep2")
.<Domain2, Domain2>chunk(50)
.reader(reader2)
.writer(writer2)
.listener(stepExecutionListner())
.build();
}
#Bean
public Partitioner partitioner1() {
FilePartitioner filePartitioner = new FilePartitioner("classpath:input/test1*.csv");
return filePartitioner.getFilesPartitioner();
}
#Bean
public Partitioner partitioner2() {
FilePartitioner filePartitioner = new FilePartitioner("classpath:input/test2*.csv");
return filePartitioner.getFilesPartitioner();
}
#Bean(name="partitionHandler1")
public PartitionHandler partitionHandler1(TaskLauncher taskLauncher,
JobExplorer jobExplorer, TaskRepository taskRepository) {
Resource resource = this.resourceLoader.getResource("maven://com.abc:test:1.0-SNAPSHOT");
DeployerPartitionHandler partitionHandler =
new DeployerPartitionHandler(taskLauncher, jobExplorer, resource, "ormBusUnitLoaderStep",taskRepository);
List<String> commandLineArgs = new ArrayList<>(3);
commandLineArgs.add("--spring.profiles.active=worker");
commandLineArgs.add("--spring.cloud.task.initialize.enable=false");
commandLineArgs.add("--spring.batch.initializer.enabled=false");
partitionHandler.setCommandLineArgsProvider(new PassThroughCommandLineArgsProvider(commandLineArgs));
SimpleEnvironmentVariablesProvider environmentVariablesProvider = new SimpleEnvironmentVariablesProvider(this.environment);
partitionHandler.setEnvironmentVariablesProvider(environmentVariablesProvider);
partitionHandler.setMaxWorkers(3);
partitionHandler.setApplicationName("Job");
return partitionHandler;
}
#Bean(name="partitionHandler2")
//#Scope(value = "prototype")
public PartitionHandler partitionHandler2(TaskLauncher taskLauncher,
JobExplorer jobExplorer, TaskRepository taskRepository) {
Resource resource = this.resourceLoader.getResource("maven://com.abc:test:1.0-SNAPSHOT");
DeployerPartitionHandler partitionHandler =
new DeployerPartitionHandler(taskLauncher, jobExplorer, resource, "cvaRmaStep",taskRepository);
List<String> commandLineArgs = new ArrayList<>(3);
commandLineArgs.add("--spring.profiles.active=worker");
commandLineArgs.add("--spring.cloud.task.initialize.enable=false");
commandLineArgs.add("--spring.batch.initializer.enabled=false");
partitionHandler.setCommandLineArgsProvider(new PassThroughCommandLineArgsProvider(commandLineArgs));
SimpleEnvironmentVariablesProvider environmentVariablesProvider = new SimpleEnvironmentVariablesProvider(this.environment);
partitionHandler.setEnvironmentVariablesProvider(environmentVariablesProvider);
partitionHandler.setMaxWorkers(3);
partitionHandler.setApplicationName("CVAJob");
return partitionHandler;
}
#Bean
#StepScope
public StepExecutionListner stepExecutionListner() {
return new StepExecutionListner();
}
}
Below is DB config
#Configuration
public class DbConfig implements BatchConfigurer {
#ConfigurationProperties(prefix = "spring.datasource")
#Bean(name="batchDataSource")
#Primary
public DataSource dataSource() {
return DataSourceBuilder.create().build();
}
#Override
public JobRepository getJobRepository() throws Exception {
JobRepositoryFactoryBean factoryBean = new JobRepositoryFactoryBean();
factoryBean.setDatabaseType("ORACLE");
factoryBean.setDataSource(dataSource());
factoryBean.setTransactionManager(getTransactionManager());
factoryBean.setIsolationLevelForCreate("ISOLATION_READ_COMMITTED");
factoryBean.setTablePrefix("SCHEMA.BATCH_");
return factoryBean.getObject();
}
#Override
public PlatformTransactionManager getTransactionManager() throws Exception {
return new DataSourceTransactionManager(dataSource());
}
#Override
public JobLauncher getJobLauncher() throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(getJobRepository());
jobLauncher.afterPropertiesSet();
return jobLauncher;
}
#Override
public JobExplorer getJobExplorer() throws Exception {
JobExplorerFactoryBean factory = new JobExplorerFactoryBean();
factory.setDataSource(dataSource());
factory.afterPropertiesSet();
return factory.getObject();
}
#Bean
public TaskConfigurer taskConfigurer(
#Qualifier("batchDataSource")DataSource batchDataSource){
return new DefaultTaskConfigurer(batchDataSource);
}
}
How to accomplish my use case using remote partitioning?
I think that you can just have one single partitionHandler bean, but what you can do is to have a partitioned subflow with multiple steps, instead of having multiple steps with different partition handlers!
Related
I need to start spring batch job in Async method, but I got lazy exception in item processor:
This is my Approach:
Service that contains Async method looks like:
#Service
#RequiredArgsConstructor
#Slf4j
public class BatchService {
#Qualifier(value = "attachmentsJob")
private final Job job;
#Qualifier(value = "asyncJobLauncher")
private final JobLauncher jobLauncher;
private final JobLogService jobLogService;
#Async
public void migrateAttachments() {
JobLogDB jobLogDB = jobLogService.createStartJobLog(job);
try {
Map<String, JobParameter> parameters = new HashMap<>();
parameters.put("jobId", new JobParameter(jobLogDB.getId()));
jobLauncher.run(job, new JobParameters(parameters));
} catch (JobExecutionAlreadyRunningException | JobRestartException | JobInstanceAlreadyCompleteException | JobParametersInvalidException e) {
e.printStackTrace();
jobLogService.markJobAsFailed(jobLogDB.getId());
}
}
}
The Batch Config is:
#Configuration
#RequiredArgsConstructor
#EnableBatchProcessing
public class BatchConfig {
private final JobBuilderFactory jobBuilderFactory;
private final JobRepository jobRepository;
private final StepBuilderFactory stepBuilderFactory;
private final MessageRepository messageRepository;
private final PlatformTransactionManager platformTransactionManager;
private final MessageDbAttachmentsProcessor messageDbAttachmentsProcessor;
#Bean(name = "asyncBatchTaskExecutor")
public TaskExecutor taskExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(64);
executor.setMaxPoolSize(64);
executor.setQueueCapacity(64);
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
executor.setThreadNamePrefix("MigrateAttachmentsThread-");
executor.initialize();
return executor;
}
/**
* Batch Reader
*/
#Bean
public RepositoryItemReader<MessageDB> reader() {
return new RepositoryItemReaderBuilder<MessageDB>().name("readerName")
.repository(messageRepository)
.methodName("findAllByTaskIdIsNotNull")
.pageSize(10)
.sorts(Collections.singletonMap("taskId", Sort.Direction.ASC))
.build();
}
/**
* Batch Processor
*/
#Bean
public AsyncItemProcessor<MessageDB, MessageDB> processor() {
AsyncItemProcessor<MessageDB, MessageDB> asyncItemProcessor = new AsyncItemProcessor<>();
asyncItemProcessor.setDelegate(messageDbAttachmentsProcessor);
asyncItemProcessor.setTaskExecutor(taskExecutor());
return asyncItemProcessor;
}
/**
* Batch Writer
*/
#Bean
public RepositoryItemWriter<MessageDB> writer() {
RepositoryItemWriter repositoryItemWriter = new RepositoryItemWriter();
repositoryItemWriter.setRepository(messageRepository);
repositoryItemWriter.setMethodName("save");
return repositoryItemWriter;
}
#Bean
public AsyncItemWriter<MessageDB> asyncWriter() {
AsyncItemWriter<MessageDB> asyncItemWriter = new AsyncItemWriter<>();
asyncItemWriter.setDelegate(writer());
return asyncItemWriter;
}
#Bean(name = "attachmentsJob")
public Job migrateTaskAttachmentsJob(JobCompletionNotificationListener listener, Step step) {
return jobBuilderFactory.get("taskAttachmentsJob")
.incrementer(new RunIdIncrementer())
.listener(listener)
.flow(step)
.end()
.build();
}
#Bean
public Step step() {
return stepBuilderFactory.get("step")
.<MessageDB, Future<MessageDB>>chunk(5)
.reader(reader())
.processor(processor())
.writer(asyncWriter())
.transactionManager(platformTransactionManager)
.build();
}
#Bean(name = "asyncJobLauncher")
public JobLauncher asyncJobLauncher() throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(jobRepository);
jobLauncher.setTaskExecutor(taskExecutor());
return jobLauncher;
}
}
When I call the method in the MessageDbAttachmentsProcessor I get lazy exception:
org.hibernate.LazyInitializationException: failed to lazily initialize a collection of role: xxx.xxx.xxx.xxx, could not initialize proxy - no Session
at org.hibernate.collection.internal.AbstractPersistentCollection.throwLazyInitializationException(AbstractPersistentCollection.java:606)
at org.hibernate.collection.internal.AbstractPersistentCollection.withTemporarySessionIfNeeded(AbstractPersistentCollection.java:218)
at org.hibernate.collection.internal.AbstractPersistentCollection.initialize(AbstractPersistentCollection.java:585)
at org.hibernate.collection.internal.AbstractPersistentCollection.write(AbstractPersistentCollection.java:409)
at org.hibernate.collection.internal.PersistentBag.add(PersistentBag.java:407)
I tried to fix it by adding #Transactional(propagation = Propagation.REQUIRES_NEW) on migrateAttachments method but without success, after I added I got following exception:
java.lang.IllegalStateException: Existing transaction detected in JobRepository. Please fix this and try again (e.g. remove #Transactional annotations from client).
I do not know what to change and in which direction to modify. Any help is welcomed.
My Multithreaded Spring Batch Step is behaving almost erratically. I haven't been able to discern any kind of pattern in the ways it's failing. Sometimes it reads and writes too many records from the database and sometimes it doesn't read enough.
I'm using a RepositoryItemReader to execute a custom native query. I've defined a countQuery for it and I've used the reader's setMaxItemCount(totalLimit) method, but it seems to consider that more of a suggestion rather than an actual hard maximum. Because with a thread count of 4, and just 1 intentionally bad record that causes 1 skip in the processor logic, I've seen...
limit | pageSize | chunkSize || actual writes
100 | 10 | 5 || 110 unique writes
800 | 100 | 25 || 804 unique writes, and 37 duplicate writes (WHY?)
800 | 100 | 25 || 663 unique writes, and 165 duplicate writes (WHYYYY???)
My project is using Spring Boot 2.1.11.RELEASE and it looks like the version of spring-batch-infrastructure that's pulling in is 4.1.3.RELEASE. Does anyone have any idea why Spring Batch is performing either too many or duplicate writes when just 1 skip occurs on one of the pages?
Maybe it has something to do with the way I've configured my in-memory JobRepository...
Here's my repository class:
#Repository
public interface MyEntityRepository extends JpaRepository<MyEntity, Integer> {
String FROM_MY_ENTITY_TABLE_LEFT_JOINED_WITH_ANOTHER_TABLE = "from {h-schema}my_entity e" +
"left join {h-schema}another_table a" +
"on e.fk = a.pk ";
#Query(
value = "select e.id, e.name, a.additional_info" +
FROM_MY_ENTITY_TABLE_LEFT_JOINED_WITH_ANOTHER_TABLE +
"where e.status <> :status and e.add_date < :date",
countjQuery = "select count(*) " +
FROM_MY_ENTITY_TABLE_LEFT_JOINED_WITH_ANOTHER_TABLE +
"where e.status <> :status and e.add_date < :date",
nativeQuery = true)
Page<MyProjection> findMyProjectionsWithoutStatusBeforeDate(#Param("status") String status,
#Param("date") Date date,
Pageable page);
}
And here's how I've configured my job:
#Configuration
public class ConversionBatchJobConfig {
#Bean
public SimpleCompletionPolicy processChunkSize(#Value("${commit.chunk.size:5}") Integer chunkSize) {
return new SimpleCompletionPolicy(chunkSize);
}
#Bean
#StepScope
public ItemStreamReader<MyProjection> dbReader(
MyEntityRepository myEntityRepository,
#Value("#{jobParameters[startTime]}") Date startTime,
#Value("#{jobParameters[pageSize]}") Integer pageSize,
#Value("#{jobParameters[limit]}") Integer limit) {
RepositoryItemReader<MyProjection> myProjectionRepositoryReader = new RepositoryItemReader<>();
myProjectionRepositoryReader.setRepository(myEntityRepository);
myProjectionRepositoryReader.setMethodName("findMyProjectionsWithoutStatusBeforeDate");
myProjectionRepositoryReader.setArguments(new ArrayList<Object>() {{
add("REMOVED");
add(startTime);
}});
myProjectionRepositoryReader.setSort(new HashMap<String, Sort.Direction>() {{
put("e.id", Sort.Direction.ASC);
}});
myProjectionRepositoryReader.setPageSize(pageSize);
myProjectionRepositoryReader.setMaxItemCount(limit);
myProjectionRepositoryReader.setSaveState(false);
return myProjectionRepositoryReader;
}
#Bean
#StepScope
public ItemProcessor<MyProjection, JsonMessage> dataConverter(AdditionalDbDataRetrievalService dataRetrievalService) {
return new MyProjectionToJsonMessageConverter(dataRetrievalService); // <== simple ItemProcessor implementation
}
#Bean
#StepScope
public ItemWriter<JsonMessage> jsonPublisher(GcpPubsubPublisherService publisherService) {
return new JsonMessageWriter(publisherService); // <== simple ItemWriter implementation
}
#Bean
public Step conversionProcess(SimpleCompletionPolicy processChunkSize,
ItemStreamReader<MyProjection> dbReader,
ItemProcessor<MyProjection, JsonMessage> dataConverter,
ItemWriter<JsonMessage> jsonPublisher,
StepBuilderFactory stepBuilderFactory,
TaskExecutor conversionThreadPool,
#Value("${conversion.failure.limit:20}") int maximumFailures) {
return stepBuilderFactory.get("conversionProcess")
.<MyProjection, JsonMessage>chunk(processChunkSize)
.reader(dbReader)
.processor(dataConverter)
.writer(jsonPublisher)
.faultTolerant()
.skipPolicy(new MyCustomConversionSkipPolicy(maximumFailures))
// ^ for now this returns true for everything until 20 failures
.listener(new MyConversionSkipListener(processStatus))
// ^ for now this just logs the error
.taskExecutor(conversionThreadPool)
.build();
}
#Bean
public Job conversionJob(Step conversionProcess,
JobBuilderFactory jobBuilderFactory) {
return jobBuilderFactory.get("conversionJob")
.start(conversionProcess)
.build();
}
}
And here's how I've configured my in-memory Job Repository:
#Configuration
#EnableBatchProcessing
public class InMemoryBatchManagementConfig {
#Bean
public ResourcelessTransactionManager resourcelessTransactionManager() {
ResourcelessTransactionManager resourcelessTransactionManager = new ResourcelessTransactionManager();
return resourcelessTransactionManager;
}
#Bean
public MapJobRepositoryFactoryBean mapJobRepositoryFactory(ResourcelessTransactionManager resourcelessTransactionManager)
throws Exception {
MapJobRepositoryFactoryBean factory = new MapJobRepositoryFactoryBean(resourcelessTransactionManager);
factory.afterPropertiesSet();
return factory;
}
#Bean
public JobRepository jobRepository(MapJobRepositoryFactoryBean factory) throws Exception {
return factory.getObject();
}
#Bean
public SimpleJobLauncher jobLauncher(JobRepository jobRepository) throws Exception {
SimpleJobLauncher launcher = new SimpleJobLauncher();
launcher.setJobRepository(jobRepository);
launcher.afterPropertiesSet();
return launcher;
}
#Bean
public JobExplorer jobExplorer(MapJobRepositoryFactoryBean factory) {
return new SimpleJobExplorer(factory.getJobInstanceDao(), factory.getJobExecutionDao(),
factory.getStepExecutionDao(), factory.getExecutionContextDao());
}
#Bean
public BatchConfigurer batchConfigurer(MapJobRepositoryFactoryBean mapJobRepositoryFactory,
ResourcelessTransactionManager resourceslessTransactionManager,
SimpleJobLauncher jobLauncher,
JobExplorer jobExplorer) {
return new BatchConfigurer() {
#Override
public JobRepository getJobRepository() throws Exception {
return mapJobRepositoryFactory.getObject();
}
#Override
public PlatformTransactionManager getTransactionManager() throws Exception {
return resourceslessTransactionManager;
}
#Override
public JobLauncher getJobLauncher() throws Exception {
return jobLauncher;
}
#Override
public JobExplorer getJobExplorer() throws Exception {
return jobExplorer;
}
};
}
}
EDIT
Was able to get Spring Batch working with an H2 database instead of a Map repository, but I'm still seeing the same issue. Here's how I configured batch to use H2:
I imported the H2 driver:
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>1.4.200</version>
</dependency>
I configured my primary DB config to point to my JPA entities:
#Configuration
#EnableTransactionManagement
#EnableJpaRepositories(basePackages = "com.company.project.jpa.repository", transactionManagerRef = "transactionManager")
#EntityScan(basePackages = "com.company.project.jpa.entity")
public class DbConfig {
#Bean
#Primary
#ConfigurationProperties("oracle.datasource")
public DataSource dataSource() {
return DataSourceBuilder.create().build();
}
#Bean
#Primary
public LocalContainerEntityManagerFactoryBean entityManagerFactory(DataSource dataSource,
EntityManagerFactoryBuilder builder) {
return builder.dataSource(dataSource).packages("com.company.project.jpa").build();
}
#Bean
#Primary
public PlatformTransactionManager transactionManager(
#Qualifier("entityManagerFactory") LocalContainerEntityManagerFactoryBean entityManagerFactory) {
return new JpaTransactionManager(entityManagerFactory.getObject());
}
}
And then I configured my in-memory Batch management like this:
#Configuration
#EnableBatchProcessing
public class InMemoryBatchManagementConfig {
#Bean(destroyMethod = "shutdown")
public EmbeddedDatabase h2DataSource() {
return new EmbeddedDatabaseBuilder().setType(EmbeddedDatabaseType.H2)
.addScript("classpath:org/springframework/batch/core/schema-drop-h2.sql")
.addScript("classpath:org/springframework/batch/core/schema-h2.sql")
.build();
}
#Bean
public LocalContainerEntityManagerFactoryBean h2EntityManagerFactory(EmbeddedDatabase h2DataSource,
EntityManagerFactoryBuilder builder) {
return builder.dataSource(h2DataSource).packages("org.springframework.batch.core").build();
}
#Bean
public PlatformTransactionManager h2TransactionManager(
#Qualifier("h2EntityManagerFactory") LocalContainerEntityManagerFactoryBean h2EntityManagerFactory) {
return new JpaTransactionManager(h2EntityManagerFactory.getObject());
}
#Bean
public JobRepository jobRepository(EmbeddedDatabase h2DataSource,
#Qualifier("h2TransactionManager") PlatformTransactionManager h2TransactionManager) throws Exception {
final JobRepositoryFactoryBean factory = new JobRepositoryFactoryBean();
factory.setDatabaseType(DatabaseType.H2.getProductName());
factory.setDataSource(h2DataSource);
factory.setTransactionManager(h2TransactionManager);
return factory.getObject();
}
#Bean
public SimpleJobLauncher jobLauncher(JobRepository jobRepository) throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(jobRepository);
jobLauncher.afterPropertiesSet();
return jobLauncher;
}
#Bean
public JobRepositoryFactoryBean jobRepositoryFactoryBean(EmbeddedDatabase h2DataSource,
#Qualifier("h2TransactionManager") PlatformTransactionManager h2TransactionManager) {
JobRepositoryFactoryBean jobRepositoryFactoryBean = new JobRepositoryFactoryBean();
jobRepositoryFactoryBean.setDataSource(h2DataSource);
jobRepositoryFactoryBean.setTransactionManager(h2TransactionManager);
return jobRepositoryFactoryBean;
}
#Bean
public BatchConfigurer batchConfigurer(JobRepository jobRepository,
SimpleJobLauncher jobLauncher,
#Qualifier("h2TransactionManager") PlatformTransactionManager h2TransactionManager,
JobExplorer jobExplorer) {
return new BatchConfigurer() {
#Override
public JobRepository getJobRepository() {
return jobRepository;
}
#Override
public PlatformTransactionManager getTransactionManager() {
return h2TransactionManager;
}
#Override
public JobLauncher getJobLauncher() {
return jobLauncher;
}
#Override
public JobExplorer getJobExplorer() {
return jobExplorer;
}
};
}
}
I am trying to load data from SQL server, apply some transformations and put it into CSV using the spring batch scheduler. All works fine when everything is in the same class.
This is my code:
package com.abc.tools.bootbatch;
#Configuration
#EnableBatchProcessing
public class BatchConfiguration {
#Autowired
public JobBuilderFactory jobBuilderFactory;
#Autowired
public StepBuilderFactory stepBuilderFactory;
#Autowired
public DataSource dataSource;
private static final String qry = "select top 20 colA, colB, colC from ABC";
private Resource outputResource = new FileSystemResource("output/outputData.csv");
#Bean
public DataSource dataSource() {
final DriverManagerDataSource dataSource = new DriverManagerDataSource();
dataSource.setDriverClassName(driver_class);
dataSource.setUrl("db_url");
dataSource.setUsername(usr);
dataSource.setPassword(pwd);
return dataSource;
}
#Bean
ItemReader<Trade> reader() {
JdbcCursorItemReader<Trade> databaseReader = new JdbcCursorItemReader<>();
databaseReader.setDataSource(dataSource);
databaseReader.setSql(qry);
databaseReader.setRowMapper(new BeanPropertyRowMapper<>(Trade.class));
return databaseReader;
}
#Bean
public TradeProcessor processor() {
return new TradeProcessor();
}
#Bean
public FlatFileItemWriter<Trade> writer()
{
//Create writer instance
FlatFileItemWriter<Trade> writer = new FlatFileItemWriter<>();
//Set output file location
writer.setResource(outputResource);
//All job repetitions should "append" to same output file
writer.setAppendAllowed(true);
//Name field values sequence based on object properties
writer.setLineAggregator(new DelimitedLineAggregator<Trade>() {
{
setDelimiter(",");
setFieldExtractor(new BeanWrapperFieldExtractor<Trade>() {
{
setNames(new String[] { "colA", "colB", "colC" });
}
});
}
});
return writer;
}
#Bean
public Step step1() {
return stepBuilderFactory.get("step1").<Trade, Trade> chunk(10)
.reader(reader())
.processor(processor())
.writer(writer())
.build();
}
#Bean
public Job exportUserJob() {
return jobBuilderFactory.get("exportUserJob")
.incrementer(new RunIdIncrementer())
.flow(step1())
.end()
.build();
}
}
When I seperate the processing, loading and data reading in different classes, it works fine using autowire, unless I use batch job. On using the batch job it gives error in instantiating the database.
So I removed the autowire and tried to do something like this:
#Configuration
#EnableBatchProcessing
public class BatchConfiguration {
#Autowired
public JobBuilderFactory jobBuilderFactory;
#Autowired
public StepBuilderFactory stepBuilderFactory;
#Autowired
public DBConfig dbConfig;
public DataConnection dataconnection=new DataConnection();
DataReader reader=new DataReader();
TradeProcessor processor=new TradeProcessor();
FlatFileWriter flatFileWriter=new FlatFileWriter();
DataSource ds=dataconnection.getDataSource(dbConfig);
#Bean
public Step step1() {
return stepBuilderFactory.get("step1").<Trade, Trade> chunk(10)
.reader(reader.reader(ds))
.processor(processor.processor())
.writer(flatFileWriter.writer())
.build();
}
#Bean
public Job exportUserJob() {
return jobBuilderFactory.get("exportUserJob")
.incrementer(new RunIdIncrementer())
.flow(step1())
.end()
.build();
}
}
This gives Failed to initialize BatchConfiguration
org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'batchConfiguration'
I think I am missing something to aggregate it all. I am new to Spring, any help is appreciated
In your first example, you are autowiring a datasource and declaring a datasource bean in the same class which is incorrect. In the second example, instead of autowiring DBConfig, you can import it with #Import(DBConfig.class) and autowire the datasource in your job configuration as needed. Here is a typical configuration:
#Configuration
public class DBConfig {
#Bean
public DataSource dataSource() {
final DriverManagerDataSource dataSource = new DriverManagerDataSource();
dataSource.setDriverClassName(driver_class);
dataSource.setUrl("db_url");
dataSource.setUsername(usr);
dataSource.setPassword(pwd);
return dataSource;
}
}
#Configuration
#EnableBatchProcessing
#Import(DBConfig.class)
public class BatchConfiguration {
#Bean
ItemReader<Trade> reader(DataSource datasource) {
// use datasource to configure the reader
}
}
Since you use Spring Boot, you can remove the DBConfig class, configure the datasource as needed in your application.properties file and the datasource will be automatically injected in your BatchConfiguration.
I'm trying to create a simple backup job processing, to verify that my goals are correct.
I'm trying to use spring batch testing, for results verification.
PS My batch processing uses non default configuration provided by framework, because our job repository should use non default schema name.
The read step of my job is configured for lazy initialization with #StepScope annotation, it is needed because my job should have some parameters to query the database on reading step
This is the sample configuration that we're using
It is in the root package, rest batch configs located in children packages
#Configuration
#Import({ApplicationHibernateConfiguration.class})
#ComponentScan
public class ApplicationBatchConfiguration extends DefaultBatchConfigurer {
private static final String BATCH_PROCESSING_PREFIX = "BATCH_PROCESSING.BATCH_";
private final DataSource dataSource;
private final PlatformTransactionManager transactionManager;
private final JobLauncher jobLauncher;
private final JobRepository jobRepository;
private final JobExplorer jobExplorer;
#Autowired
public GlobalLogisticsPortalBatchConfiguration(
DataSource dataSource, PlatformTransactionManager transactionManager) throws Exception {
this.dataSource = dataSource;
this.transactionManager = transactionManager;
this.jobRepository = createJobRepository();
this.jobLauncher = createJobLauncher();
this.jobExplorer = createJobExplorer();
}
#Override
protected JobLauncher createJobLauncher() throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(jobRepository);
jobLauncher.setTaskExecutor(new SimpleAsyncTaskExecutor());
jobLauncher.afterPropertiesSet();
return jobLauncher;
}
#Override
protected JobRepository createJobRepository() throws Exception {
JobRepositoryFactoryBean factoryBean = new JobRepositoryFactoryBean();
factoryBean.setDatabaseType("DB2");
factoryBean.setTablePrefix(BATCH_PROCESSING_PREFIX);
factoryBean.setIsolationLevelForCreate("ISOLATION_REPEATABLE_READ");
factoryBean.setDataSource(this.dataSource);
factoryBean.setTransactionManager(this.transactionManager);
factoryBean.afterPropertiesSet();
return factoryBean.getObject();
}
#Override
protected JobExplorer createJobExplorer() throws Exception {
JobExplorerFactoryBean factoryBean = new JobExplorerFactoryBean();
factoryBean.setDataSource(this.dataSource);
factoryBean.setTablePrefix(BATCH_PROCESSING_PREFIX);
factoryBean.afterPropertiesSet();
return factoryBean.getObject();
}
#Override
#Bean
public JobRepository getJobRepository() {
return jobRepository;
}
#Override
public PlatformTransactionManager getTransactionManager() {
return transactionManager;
}
#Override
#Bean
public JobLauncher getJobLauncher() {
return jobLauncher;
}
#Override
#Bean
public JobExplorer getJobExplorer() {
return jobExplorer;
}
#Bean
public JobBuilderFactory jobBuilderFactory(JobRepository jobRepository) {
return new JobBuilderFactory(jobRepository);
}
#Bean
public StepBuilderFactory stepBuilderFactory(
JobRepository jobRepository, PlatformTransactionManager transactionManager) {
return new StepBuilderFactory(jobRepository, transactionManager);
}
}
Step that i'm trying to use looks like that:
#Bean
#StepScope
public JdbcPagingItemReader<DomainObject> itemReader(
#Value("#{jobParameters['id']}") String id) {
JdbcPagingItemReader<DomainObject> reader = new JdbcPagingItemReader<>();
reader.setDataSource(this.dataSource);
reader.setFetchSize(10);
Db2PagingQueryProvider nativeQueryProvider = new Db2PagingQueryProvider();
nativeQueryProvider.setSelectClause("*");
nativeQueryProvider.setFromClause("from SCHEMA.DOMAIN");
nativeQueryProvider.setWhereClause("id = :id");
Map<String, Object> params = new HashMap<>(1);
params.put("id", id);
reader.setRowMapper((rs, rowNum) -> {
DomainObject element = new DomainObject();
element.setId(rs.getString("ID"));
return element;
});
reader.setParameterValues(params);
reader.setQueryProvider(nativeQueryProvider);
return reader;
}
#Bean
public Step fetchDomain() throws Exception {
return stepBuilderFactory.get("fetchDomain")
.<HierarchyElement, HierarchyElement>chunk(10)
.faultTolerant()
.reader(itemReader(null))
.writer(items -> items.forEach(System.out::println))
.build();
}
The actual job bean is currently configured to launch only single step
#Bean
public Job backupJob() throws Exception {
return jobBuilderFactory.get("backupJob")
.start(fetchHeid())
.build();
}
My Test code looks like following
#RunWith(SpringRunner.class)
#SpringBatchTest
#ContextConfiguration(classes = {ApplicationBatchConfiguration.class})
public class BackupJobConfigurationTest {
#Autowired
#Qualifier(value = "backupJob")
public Job job;
#Autowired
private JobLauncherTestUtils jobLauncherTestUtils;
#Test
public void flowTest() throws Exception {
JobParameters parameters = new JobParametersBuilder()
.addString("id", "124")
.toJobParameters();
JobExecution execution = jobLauncherTestUtils.launchJob(parameters);
assertEquals(BatchStatuses.COMPLETED, execution.getExitStatus().getExitCode()); //failed
}
}
I expect the exit code to be "COMPLETED" and get "UNKNOWN".
Also I'm not sure that the code is actually getting even invoked because i don't see any outputs from the writer lambda.
The only output that i am seeing in test is
Aug 30, 2019 2:52:17 PM org.springframework.batch.core.launch.support.SimpleJobLauncher run
INFO: Job: [FlowJob: [name=backupJob]] launched with the following parameters: [{id=124}]
org.junit.ComparisonFailure:
Expected :COMPLETED
Actual :UNKNOWN
I am figured that out, first of all, i were need to remove SimpleAsyncTaskExecutor from my configuration to actually test the code in the same thread, then by more careful read of the reference I havve reconfigured my batch config, instead of extending BatchConfigurer directly I haveve configured it as a bean inside my configuration and add #EnableSpringBatch annotation
#Bean
public BatchConfigurer batchConfigurer() {
return new DefaultBatchConfigurer() {
#Override
protected JobRepository createJobRepository() throws Exception {
JobRepositoryFactoryBean factoryBean = new JobRepositoryFactoryBean();
factoryBean.setDatabaseType("db2");
factoryBean.setTablePrefix(BATCH_PROCESSING_PREFIX);
factoryBean.setIsolationLevelForCreate("ISOLATION_REPEATABLE_READ");
factoryBean.setDataSource(dataSource);
factoryBean.setTransactionManager(transactionManager);
factoryBean.afterPropertiesSet();
return factoryBean.getObject();
}
#Override
protected JobExplorer createJobExplorer() throws Exception {
JobExplorerFactoryBean factoryBean = new JobExplorerFactoryBean();
factoryBean.setDataSource(dataSource);
factoryBean.setTablePrefix(BATCH_PROCESSING_PREFIX);
factoryBean.afterPropertiesSet();
return factoryBean.getObject();
}
#Override
protected JobLauncher createJobLauncher() throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(getJobRepository());
//jobLauncher.setTaskExecutor(new SimpleAsyncTaskExecutor());
jobLauncher.afterPropertiesSet();
return jobLauncher;
}
};
}
I created a SpringBatch application with java configuration. I have a main method and a class that represents a job.
#ComponentScan
#EnableAutoConfiguration
public class App
{
public static void main( String[] args )
{
System.out.println( "Starting Spring Batch Execution -------" );
SpringApplication.run(App.class, args);
}
}
#Configuration
#EnableBatchProcessing
public class FlatFileJob {
#Autowired
private JobBuilderFactory jobs;
#Autowired
private StepBuilderFactory steps;
/**
* Create and configure job
* #return
*/
#Bean(name = "Read RabbitMQ")
public Job addFlatFileJob(){
return jobs.get("carJob")
.start(this.flatFileStep())
.build();
}
/**
* Create and configure the only step
* #return
*/
#Bean
public Step flatFileStep(){
return steps.get("step")
.<Car, Car> chunk(3)
.reader(new CarItemReader())
.processor(new CarItemProcessor())
.writer(new CarItemWriter())
.build();
}
#Bean
public PlatformTransactionManager transactionManager(){
return new ResourcelessTransactionManager();
}
#Bean
public JobRepository jobRepository() throws Exception{
JobRepository jobRepository = (JobRepository) new JobRepositoryFactoryBean();
return jobRepository;
}
#Bean
public JdbcTemplate jdbcTemplate(DataSource dataSource){
return new JdbcTemplate(dataSource);
}
#Bean
public DataSource getDataSource(){
BasicDataSource dataSource = new BasicDataSource();
dataSource.setDriverClassName("org.postgresql.Driver");
dataSource.setUrl("jdbc:postgresql://127.0.0.1:5432/spring_batch");
dataSource.setUsername("xxx");
dataSource.setPassword("xxx");
return dataSource;
}
}
When I execute this, I got an exception.
Caused by: java.lang.IllegalArgumentException: DataSource must not be null.
at org.springframework.util.Assert.notNull(Assert.java:112)
at org.springframework.batch.core.repository.support.JobRepositoryFactoryBean.afterPropertiesSet(JobRepositoryFactoryBean.java:171)
at org.springframework.batch.core.repository.support.AbstractJobRepositoryFactoryBean.getObject(AbstractJobRepositoryFactoryBean.java:202)
at neoway.com.job.FlatFileJob.jobRepository(FlatFileJob.java:88)
at neoway.com.job.FlatFileJob$$EnhancerBySpringCGLIB$$7ec3c4f6.CGLIB$jobRepository$0(<generated>)
at neoway.com.job.FlatFileJob$$EnhancerBySpringCGLIB$$7ec3c4f6$$FastClassBySpringCGLIB$$990caa45.invoke(<generated>)
at org.springframework.cglib.proxy.MethodProxy.invokeSuper(MethodProxy.java:228)
at org.springframework.context.annotation.ConfigurationClassEnhancer$BeanMethodInterceptor.intercept(ConfigurationClassEnhancer.java:312)
at neoway.com.job.FlatFileJob$$EnhancerBySpringCGLIB$$7ec3c4f6.jobRepository(<generated>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.springframework.beans.factory.support.SimpleInstantiationStrategy.instantiate(SimpleInstantiationStrategy.java:166)
I configure all with java, including DataSource. I don't know why Spring didn't recognize DataSource configuration. What's the problem?