I'm trying to understand why I have a native query that loads more data than a JPA one, yet the native query is much faster.
Here is my JPA query:
#NamedEntityGraph(
name = "ShipmentTbl.report.scheduling-delivery",
attributeNodes = {
#NamedAttributeNode("boxes"),
#NamedAttributeNode("pallets"),
#NamedAttributeNode("shipFrom"),
#NamedAttributeNode("shipmentReceive"),
#NamedAttributeNode("shipmentPurchaseOrders"),
#NamedAttributeNode("shipmentProducts"),
#NamedAttributeNode("distributionCenter"),
#NamedAttributeNode("scheduledDates")
}
)
.......
#EntityGraph(value = "ShipmentTbl.report.scheduling-delivery", type = EntityGraph.EntityGraphType.LOAD)
#Query("SELECT s FROM ShipmentTbl s " +
"JOIN s.shipmentReceive sr " +
"WHERE (sr.receivedDatetime BETWEEN :startDate AND :endDate) " +
"AND (:includeArchivedShipments = TRUE OR s.status <> 12)")
List<ShipmentTbl> getDeliveryReportData(#Param(value = "startDate") Timestamp startDate,
#Param(value = "endDate") Timestamp endDate,
#Param(value = "includeArchivedShipments") boolean includeArchivedShipments);
And here is my native query:
SELECT
s.id,
s.shipment_name,
max(dc.name) as dc,
max(sf.country) as country,
s.status,
s.estimated_shipment_date,
string_agg(CAST(ssd.scheduled_shipment_date as TEXT), ',') as scheduled_shipment_date,
string_agg(CAST(ssd.scheduled_period_of_day as TEXT), ',') as scheduled_period_of_day,
s.courier,
s.courier_amount,
s.courier_currency,
s.total_volume,
s.measurement_unit,
s.total_weight,
s.weight_unit,
s.tracking_number,
s.auth_code,
s.booked_in_by_user,
s.tenant,
(SELECT stm.date FROM status_transition_message stm WHERE (stm.initial_status = 0 AND stm.final_status = 1 OR stm.initial_status = 0 AND stm.final_status = 7) AND stm.shipment_id = s.id ORDER BY date ASC LIMIT 1) AS submitted_dated,
(SELECT stm.user_full_name FROM status_transition_message stm WHERE (stm.initial_status = 0 AND stm.final_status = 1 OR stm.initial_status = 0 AND stm.final_status = 7) AND stm.shipment_id = s.id ORDER BY date ASC LIMIT 1) AS submitted_by,
(SELECT stm.date FROM status_transition_message stm WHERE stm.initial_status = 1 AND stm.final_status = 3 AND stm.shipment_id = s.id ORDER BY date ASC LIMIT 1) AS first_time_in_progress,
(SELECT stm.date FROM status_transition_message stm WHERE stm.initial_status = 3 AND stm.final_status = 4 AND stm.shipment_id = s.id ORDER BY date ASC LIMIT 1) AS approved_date,
(SELECT stm.user_full_name FROM status_transition_message stm WHERE stm.initial_status = 3 AND stm.final_status = 4 AND stm.shipment_id = s.id ORDER BY date ASC LIMIT 1) AS tt_user,
(SELECT stm.user_full_name FROM status_transition_message stm WHERE stm.initial_status = 4 AND (stm.final_status = 10 OR stm.final_status = 11) AND stm.shipment_id = s.id ORDER BY date ASC LIMIT 1) AS received_by,
(SELECT string_agg(CAST(purchase_order_id AS TEXT), ',') FROM (SELECT spo.purchase_order_id FROM shipment_purchase_orders spo WHERE spo.shipment_id = s.id) AS purchase_orders) AS purchase_orders,
(SELECT string_agg(to_char(bookin_datetime, 'YYYY-MM-DD HH24:MI:SS.US'), ',') FROM (SELECT sr.bookin_datetime FROM shipment_receive sr WHERE sr.shipment_id = s.id) AS bd) AS bookin_datetimes,
(SELECT string_agg(to_char(received_datetime, 'YYYY-MM-DD HH24:MI:SS.US'), ',') FROM (SELECT sr.received_datetime FROM shipment_receive sr WHERE sr.shipment_id = s.id) AS dd) AS received_datetimes,
(SELECT string_agg(CAST(file_id AS TEXT), ',') FROM (SELECT spd.file_id FROM shipment_packing_documents spd WHERE spd.shipment_id = s.id) AS packing_docs) AS packing_doc_ids,
(SELECT string_agg(CAST(file_id AS TEXT), ',') FROM (SELECT std.file_id FROM shipment_tech_documents std WHERE std.shipment_id = s.id) AS tech_docs) AS tech_doc_ids,
(SELECT string_agg(CAST(number AS TEXT), ',') FROM (SELECT scd.number FROM shipment_customs_documents scd WHERE scd.shipment_id = s.id) AS customs_numbers) AS customs_numbers,
(SELECT string_agg(CAST(file_id AS TEXT), ',') FROM (SELECT file_id FROM shipment_invoices si WHERE si.shipment_id = s.id) AS invoice_docs) AS invoice_doc_ids,
(SELECT string_agg(CAST(number AS TEXT), ', ') FROM (SELECT si.number FROM shipment_invoices si WHERE si.shipment_id = s.id) AS invoice_numbers) AS invoice_numbers,
(SELECT sum(si.amount) FROM shipment_invoices si WHERE si.shipment_id = s.id) AS invoice_amount,
(SELECT sum(p.pallets_number) FROM pallets p WHERE p.shipment_id = s.id) AS pallets_number,
(SELECT sum(p.pallets_number * p.boxes_number) FROM pallets p WHERE p.shipment_id = s.id) AS boxes_by_pallet,
(SELECT sum(b.boxes_number) FROM boxes b WHERE b.shipment_id = s.id) AS boxes,
(SELECT string_agg(CAST(products AS TEXT), ',') FROM (SELECT sp.stock_order_id, sp.shipping_quantity FROM shipment_products sp WHERE sp.shipment_id = s.id) AS products) AS products,
(SELECT string_agg(DISTINCT CAST(status AS TEXT), ',') FROM products_status WHERE shipment_id = s.id) AS product_status,
s.tt_priority
(SELECT stm.date FROM status_transition_message stm WHERE (stm.initial_status = 0 AND stm.final_status = 1 OR stm.initial_status = 7 AND stm.final_status = 1) AND stm.shipment_id = s.id ORDER BY date ASC LIMIT 1) AS first_time_in_waiting_authorization,
" s.submitted_date as last_submitted_date
"FROM shipment_receive sr
JOIN shipment s ON sr.shipment_id = s.id
LEFT JOIN shipment_scheduled_date ssd on ssd.shipment_id = s.id
JOIN distribution_center dc ON s.ship_to = dc.id
JOIN address sf ON s.ship_from = sf.id
WHERE sr.received_datetime BETWEEN :startDate AND :endDate
AND (:includeArchivedShipments = TRUE OR s.status <> 12)
GROUP BY s.id";
I want to note these two queries are not equivalent, but they do load a lot of common fields and have the same conditions in the WHERE clauses.
I know the 2nd query is long and boring to look at, but the gist is, my entities are all lazy loaded. There's not a single eagerly loaded field. So, in the JPA query, the fields in the EntityGraph are the only fields eagerly loaded. Despite that, there are less joins happening in my first query (JPA) than in my second one, which joins the same tables as the first one and more. Yet, the second query takes an average of ~11 seconds to run whereas the first one takes ~20 seconds, both with the exact same parameters.
Can someone explain why? I know the first ends up loading more "normal" fields that are in the entities (like Strings, Dates, Integers...), but I'd still expect it to run faster.
(If it's relevant enough, I can also post the native SQL query Hibernate builds using the JPA query)
EDIT: This is the SQL query generated by Hibernate from the initial JPA query:
select
shipmenttb0_.id as id1_13_0_,
shipmentre1_.id as id1_22_1_,
boxes2_.id as id1_1_2_,
shipmentpu3_.id as id1_21_3_,
addresstbl4_.id as id1_0_4_,
scheduledd5_.id as id1_23_5_,
pallets6_.id as id1_7_6_,
shipmentpr7_.id as id1_20_7_,
distributi8_.id as id1_2_8_,
shipmenttb0_.archived_date as archived2_13_0_,
shipmenttb0_.auth_code as auth_cod3_13_0_,
shipmenttb0_.authorization_date as authoriz4_13_0_,
shipmenttb0_.booked_in_by_user as booked_i5_13_0_,
shipmenttb0_.business_channel as business6_13_0_,
shipmenttb0_.courier as courier7_13_0_,
shipmenttb0_.courier_amount as courier_8_13_0_,
shipmenttb0_.courier_currency as courier_9_13_0_,
shipmenttb0_.ship_to as ship_to40_13_0_,
shipmenttb0_.estimated_shipment_date as estimat10_13_0_,
shipmenttb0_.forecast_scheduled_date as forecas11_13_0_,
shipmenttb0_.last_updated_date as last_up12_13_0_,
shipmenttb0_.measurement_unit as measure13_13_0_,
shipmenttb0_.original_submitted_date as origina14_13_0_,
shipmenttb0_.packaging_type as packagi15_13_0_,
shipmenttb0_.placeholder_message as placeho16_13_0_,
shipmenttb0_.scheduled_period_of_day as schedul17_13_0_,
shipmenttb0_.scheduled_shipment_date as schedul18_13_0_,
shipmenttb0_.ship_from as ship_fr41_13_0_,
shipmenttb0_.ship_origin as ship_or42_13_0_,
shipmenttb0_.shipment_name as shipmen19_13_0_,
shipmenttb0_.status as status20_13_0_,
shipmenttb0_.submitted_date as submitt21_13_0_,
shipmenttb0_.supplier_contact_email as supplie22_13_0_,
shipmenttb0_.supplier_contact_name as supplie23_13_0_,
shipmenttb0_.supplier_contact_phone_number as supplie24_13_0_,
shipmenttb0_.supplier_email as supplie25_13_0_,
shipmenttb0_.supplier_secondary_contact_email as supplie26_13_0_,
shipmenttb0_.supplier_secondary_contact_name as supplie27_13_0_,
shipmenttb0_.supplier_secondary_contact_phone_number as supplie28_13_0_,
shipmenttb0_.tenant as tenant29_13_0_,
shipmenttb0_.total_received_boxes as total_r30_13_0_,
shipmenttb0_.total_units as total_u31_13_0_,
shipmenttb0_.total_value as total_v32_13_0_,
shipmenttb0_.total_volume as total_v33_13_0_,
shipmenttb0_.total_weight as total_w34_13_0_,
shipmenttb0_.tracking_number as trackin35_13_0_,
shipmenttb0_.tt_note as tt_note36_13_0_,
shipmenttb0_.tt_priority as tt_prio37_13_0_,
shipmenttb0_.updated_by_user as updated38_13_0_,
shipmenttb0_.weight_unit as weight_39_13_0_,
shipmentre1_.bookin_datetime as bookin_d2_22_1_,
shipmentre1_.received_type as received3_22_1_,
shipmentre1_.delay_days as delay_da4_22_1_,
shipmentre1_.received_boxes as received5_22_1_,
shipmentre1_.received_datetime as received6_22_1_,
shipmentre1_.shipment_scheduled_date as shipment9_22_1_,
shipmentre1_.session_active as session_7_22_1_,
shipmentre1_.shipment_id as shipmen10_22_1_,
shipmentre1_.status as status8_22_1_,
shipmentre1_.shipment_id as shipmen10_22_0__,
shipmentre1_.id as id1_22_0__,
boxes2_.added_time as added_ti2_1_2_,
boxes2_.boxes_number as boxes_nu3_1_2_,
boxes2_.height as height4_1_2_,
boxes2_.length as length5_1_2_,
boxes2_.shipment_id as shipment7_1_2_,
boxes2_.width as width6_1_2_,
boxes2_.shipment_id as shipment7_1_1__,
boxes2_.id as id1_1_1__,
shipmentpu3_.purchase_order_id as purchase2_21_3_,
shipmentpu3_.shipment_id as shipment3_21_3_,
shipmentpu3_.shipment_id as shipment3_21_2__,
shipmentpu3_.id as id1_21_2__,
addresstbl4_.address_line1 as address_2_0_4_,
addresstbl4_.address_line2 as address_3_0_4_,
addresstbl4_.address_line3 as address_4_0_4_,
addresstbl4_.address_line4 as address_5_0_4_,
addresstbl4_.city as city6_0_4_,
addresstbl4_.company_name as company_7_0_4_,
addresstbl4_.contact_email as contact_8_0_4_,
addresstbl4_.contact_name as contact_9_0_4_,
addresstbl4_.contact_phone as contact10_0_4_,
addresstbl4_.country as country11_0_4_,
addresstbl4_.postal_code as postal_12_0_4_,
addresstbl4_.time_zone_id as time_zo13_0_4_,
addresstbl4_.title as title14_0_4_,
scheduledd5_.estimated_product_item_units as estimate2_23_5_,
scheduledd5_.expected_boxes as expected3_23_5_,
scheduledd5_.expected_volume as expected4_23_5_,
scheduledd5_.scheduled_period_of_day as schedule5_23_5_,
scheduledd5_.scheduled_shipment_date as schedule6_23_5_,
scheduledd5_.shipment_id as shipment7_23_5_,
scheduledd5_.shipment_id as shipment7_23_3__,
scheduledd5_.id as id1_23_3__,
pallets6_.added_time as added_ti2_7_6_,
pallets6_.boxes_number as boxes_nu3_7_6_,
pallets6_.height as height4_7_6_,
pallets6_.length as length5_7_6_,
pallets6_.pallets_number as pallets_6_7_6_,
pallets6_.shipment_id as shipment8_7_6_,
pallets6_.width as width7_7_6_,
pallets6_.shipment_id as shipment8_7_4__,
pallets6_.id as id1_7_4__,
shipmentpr7_.country_of_origin as country_2_20_7_,
shipmentpr7_.hs_code as hs_code3_20_7_,
shipmentpr7_.is_new as is_new4_20_7_,
shipmentpr7_.manufacturer_id as manufact5_20_7_,
shipmentpr7_.note as note6_20_7_,
shipmentpr7_.preferential_origin as preferen7_20_7_,
shipmentpr7_.shipment_id as shipmen10_20_7_,
shipmentpr7_.shipping_quantity as shipping8_20_7_,
shipmentpr7_.stock_order_id as stock_or9_20_7_,
(select
1) as formula0_7_,
shipmentpr7_.shipment_id as shipmen10_20_5__,
shipmentpr7_.id as id1_20_5__,
distributi8_.address_id as address_9_2_8_,
distributi8_.label as label2_2_8_,
distributi8_.manufacturer_id_required as manufact3_2_8_,
distributi8_.max_product_items_units as max_prod4_2_8_,
distributi8_.max_products_units as max_prod5_2_8_,
distributi8_.max_volume as max_volu6_2_8_,
distributi8_.name as name7_2_8_,
distributi8_.volume_unit as volume_u8_2_8_
from
shipment shipmenttb0_
inner join
shipment_receive shipmentre1_
on shipmenttb0_.id=shipmentre1_.shipment_id
left outer join
boxes boxes2_
on shipmenttb0_.id=boxes2_.shipment_id
left outer join
shipment_purchase_orders shipmentpu3_
on shipmenttb0_.id=shipmentpu3_.shipment_id
left outer join
address addresstbl4_
on shipmenttb0_.ship_from=addresstbl4_.id
left outer join
shipment_scheduled_date scheduledd5_
on shipmenttb0_.id=scheduledd5_.shipment_id
left outer join
pallets pallets6_
on shipmenttb0_.id=pallets6_.shipment_id
left outer join
shipment_products shipmentpr7_
on shipmenttb0_.id=shipmentpr7_.shipment_id
left outer join
distribution_center distributi8_
on shipmenttb0_.ship_to=distributi8_.id
where
(
shipmentre1_.received_datetime between ? and ?
)
and (
?=true
or shipmenttb0_.status<>12
)
Like I said, it does a couple of joins, but nothing more than the other native query, so it should be faster.
Most likely your JPA query is not running a single query like you are expecting. I know you said you think you have everything LAZY loaded, but you really need to turn on SQL logging to figure this out.
Whatever your logging platform is, change this logger to DEBUG
org.hibernate.SQL: DEBUG
I'm trying to write an SQL query using CriteriaQuery, but I'm having a hard time doing so. This query basically gets a list of shipments and sorts them by their authorization date. This authorization date is represented as the date attribute of the first record in the status transition messages table with an initial status of 3 and a final status of 4. This is my query:
SELECT s.id
FROM shipment s
ORDER BY (SELECT min(stm.date)
FROM status_transition_message stm
WHERE stm.initial_status = 1 AND stm.final_status = 3 AND stm.shipment_id = s.id) desc;
I've tried multiple different solutions, but none have worked so far.
My current iteration is as follows:
private void sortByAuthDate(Root<ShipmentTbl> root, CriteriaQuery<?> query, CriteriaBuilder builder, ListSort sort) {
Subquery<Timestamp> authDateQuery = query.subquery(Timestamp.class);
Root<StatusTransitionMessageTbl> stmRoot = authDateQuery.from(StatusTransitionMessageTbl.class);
Predicate shipmentId = builder.equal(stmRoot.<ShipmentTbl>get("shipment").<String>get("id"), root.<String>get("id"));
Predicate initialStatus = builder.equal(stmRoot.<Integer>get("initialStatus"), 3);
Predicate finalStatus = builder.equal(stmRoot.<Integer>get("finalStatus"), 4);
// returns the authorization date for each queried shipment
authDateQuery.select(builder.least(stmRoot.<Timestamp>get("date")))
.where(builder.and(shipmentId, initialStatus, finalStatus));
Expression<Timestamp> authDate = authDateQuery.getSelection();
Order o = sort.getSortDirection() == ListSort.SortDirection.ASC ? builder.asc(authDate) : builder.desc(authDate);
query.multiselect(authDate).orderBy(o);
}
The problem with this solution is that the SQL query generated by the CriteriaQuery does not support subqueries in the ORDER BY clause, causing a parsing exception.
My CriteriaQuery-fu is not good enough to help you with that part, but you could rewrite your SQL query to this:
SELECT s.id
FROM shipment s
LEFT JOIN status_transition_message stm
ON stm.initial_status = 1 AND stm.final_status = 3 AND stm.shipment_id = s.id
GROUP BY s.id
ORDER BY min(stm.date) DESC;
To me, this quite likely seems to be a faster solution anyway than running a correlated subquery in the ORDER BY clause, especially on RDBMS with less sophisticated optimisers.
So I attempted to follow #Lukas Eder solution and reached this solution:
private void sortByAuthDate(Root<ShipmentTbl> root, CriteriaQuery<?> query, CriteriaBuilder builder, ShipmentListSort sort) {
Join<ShipmentTbl, StatusTransitionMessageTbl> shipmentStatuses = root.join("shipmentStatus", JoinType.LEFT);
Predicate initialStatus = builder.equal(shipmentStatuses.<Integer>get("initialStatus"), 1);
Predicate finalStatus = builder.equal(shipmentStatuses.<Integer>get("finalStatus"), 3);
Expression<Timestamp> authDate = builder.least(shipmentStatuses.<Timestamp>get("date"));
Order o = sort.getSortDirection() == ShipmentListSort.SortDirection.ASC ? builder.asc(authDate) : builder.desc(authDate);
shipmentStatuses.on(builder.and(initialStatus, finalStatus));
query.multiselect(authDate).groupBy(root.<String>get("id")).orderBy(o);
}
}
But now it's throwing this exception:
ERROR o.h.e.jdbc.spi.SqlExceptionHelper - ERROR: for SELECT DISTINCT, ORDER BY expressions must appear in select list
This happens because the query is only going to get distinct shipments later on and it's asking for the sorting column also appear in the select. The problem is I don't know how to force CriteriaQuery to keep that column in the SELECT statement. It automatically only puts in the ORDER BY.
Here's the JPQL query it's executing in my test:
select
distinct generatedAlias0
from
ShipmentTbl as generatedAlias0
left join
generatedAlias0.shipmentStatus as generatedAlias1 with ( generatedAlias1.initialStatus=:param0 )
and (
generatedAlias1.finalStatus=:param1
)
where
lower(generatedAlias0.shipmentName) like :param2
group by
generatedAlias0.id
order by
min(generatedAlias1.date) desc
and the generated SQL query:
select
distinct shipmenttb0_.id as id1_13_,
shipmenttb0_.archived_date as archived2_13_,
shipmenttb0_.auth_code as auth_cod3_13_,
shipmenttb0_.authorization_date as authoriz4_13_,
shipmenttb0_.booked_in_by_user as booked_i5_13_,
shipmenttb0_.business_channel as business6_13_,
shipmenttb0_.courier as courier7_13_,
shipmenttb0_.courier_amount as courier_8_13_,
shipmenttb0_.courier_currency as courier_9_13_,
shipmenttb0_.ship_to as ship_to39_13_,
shipmenttb0_.estimated_shipment_date as estimat10_13_,
shipmenttb0_.last_updated_date as last_up11_13_,
shipmenttb0_.measurement_unit as measure12_13_,
shipmenttb0_.original_submitted_date as origina13_13_,
shipmenttb0_.packaging_type as packagi14_13_,
shipmenttb0_.placeholder_message as placeho15_13_,
shipmenttb0_.scheduled_period_of_day as schedul16_13_,
shipmenttb0_.scheduled_shipment_date as schedul17_13_,
shipmenttb0_.ship_from as ship_fr40_13_,
shipmenttb0_.ship_origin as ship_or41_13_,
shipmenttb0_.shipment_name as shipmen18_13_,
shipmenttb0_.status as status19_13_,
shipmenttb0_.submitted_date as submitt20_13_,
shipmenttb0_.supplier_contact_email as supplie21_13_,
shipmenttb0_.supplier_contact_name as supplie22_13_,
shipmenttb0_.supplier_contact_phone_number as supplie23_13_,
shipmenttb0_.supplier_email as supplie24_13_,
shipmenttb0_.supplier_secondary_contact_email as supplie25_13_,
shipmenttb0_.supplier_secondary_contact_name as supplie26_13_,
shipmenttb0_.supplier_secondary_contact_phone_number as supplie27_13_,
shipmenttb0_.tenant as tenant28_13_,
shipmenttb0_.total_received_boxes as total_r29_13_,
shipmenttb0_.total_units as total_u30_13_,
shipmenttb0_.total_value as total_v31_13_,
shipmenttb0_.total_volume as total_v32_13_,
shipmenttb0_.total_weight as total_w33_13_,
shipmenttb0_.tracking_number as trackin34_13_,
shipmenttb0_.tt_note as tt_note35_13_,
shipmenttb0_.tt_priority as tt_prio36_13_,
shipmenttb0_.updated_by_user as updated37_13_,
shipmenttb0_.weight_unit as weight_38_13_
from
shipment shipmenttb0_
left outer join
status_transition_message shipmentst1_
on shipmenttb0_.id=shipmentst1_.shipment_id
and (
shipmentst1_.initial_status=?
and shipmentst1_.final_status=?
)
where
lower(shipmenttb0_.shipment_name) like ?
group by
shipmenttb0_.id
order by
min(shipmentst1_.date) desc limit ?