insert into DB table java - java

I want to fill a table using insert statement. To get the data I used arrays that contain the data I need.
I want to insert the id and some other values in this format.
id w0 w1 w2 w3
1 0.0 1.0 0.54 0.0
2 1.0 0.5 0.0 0.8
and others
id ---> read from a normal array
the other numbers ---> read from a 2d array
and I want for each new row a new id.
The problem I have is the rows are moving and incrementing while the id is stuck in 0, so each new row is added with the same id which is wrong since the id is a primary key.
String sqlselect=new String("select k_id from keywordsTable");
ResultSet rss = stmt1.executeQuery(sqlselect);
kId=new String[numOfFields];
String f="";
String ff="";
String ffff="";
for (int i=0;i<kId.length;i++) {
while(rss.next())
{
kId[i]=rss.getString(1);
f="w"+kId[i]+ " varchar(20)";
ff+="w"+kId[i]+", ";
ffff+="w"+kId[i]+", ";
//fff+="w"+kId[i]+ " varchar(20), ";
String sqlalter=new String("ALTER TABLE "+ tableName +" add "+f+"");
//System.out.println(sqlalter);
stmt1.executeUpdate(sqlalter);
}
}
ff = ff.replaceAll(", $","");
ffff = ffff.replaceAll(", $","");
String sqlselectF=new String("select f_id from filesTable");
ResultSet rssF = stmt1.executeQuery(sqlselectF);
FId=new String[numOfFiles];
String matInsert = null;
String g="";
String seperator = "";
for (int s=0;s<FId.length;s++) {
while(rssF.next())
{
FId[s]=rssF.getString(1);
g=seperator+FId[s];
for (int k=0;k<di.mat.length;k++) { //row
String m="";
String sep = "";
for (int j=0;j<di.mat[k].length;j++) { //col
m+= (sep+di.mat[k][j]);
sep = " ,";
matInsert=new String("INSERT INTO "+ tableName +"(id,"+ffff+")"+"values" +"("+g+","+m+")");
} //col
System.out.println("ff="+g);
System.out.println(matInsert);
stmt1.executeUpdate(matInsert);
} //row
} //while
} //for 1
seperator=" ,";

You shouldn't insert IDs, configure the column as AUTO_INCREMENT and let MySQL assign the IDs upon each INSERT.
Second, the nesting of while loop inside a for loop is probably a logical mistake.
Third, ffff is not declared anywhere in the code that you posted.
Forth, the INSERT query is missing whitespaces in a few places, it will come out (if all the input is correct) as something like:
INSERT INTO tableName(id,ffff)values(x,y)

Related

Android sqlite - how to calculate the difference between 2 REAL entries in sqlite?

In Android/Java, I am trying to compute and store the difference between 2 values in sqlite when one value is entered.
My table looks like this:
When weights are added/stored in the table in the column 'Weight', the column 'Diff_Weight' shall receive "Weight(N) - Weight(N-1)". Example: the last cell of Diff_Weight (row 6) = 88.0 - 55.2 = 32.8. // Row 5 shall get '-0.7' etc. Their type is REAL (col Weight & col Diff_Weight).
This 32.8 should be calculated and added at the same time when 88.0 is added to the table.
So far, I have read lots of tutorials and can't figure how to proceed. (My code to create and insert in the DB is fine, but reading is somehow more complex).
My code to read the entry is very bad because I don't see how to set it up:
public Bouble getData() {
String selectQuery= "SELECT * FROM " + TABLE_NAME2 + " ORDER BY COL_4 DESC LIMIT 1";
SQLiteDatabase db = this.getWritableDatabase();
Cursor cursor = db.rawQuery(TABLE_NAME2, null);
result2 = Double.valueOf(cursor.getString(cursor.getColumnIndex("Weight")));
result1 = Double.valueOf(cursor.getString(cursor.getColumnIndex("Weight")-1));
insertdata(result2-result1); //insert in row 6 of Diff_Weight
return
}
Can anybody help there?
If that is unclear, I was needing some help for the sqlite command AND the java to get the difference result.
Simplistically you can get the data by joining to the same table
SELECT a.id, a.weight, b.weight, (b.weight - a.weight) FROM TABLE_NAME2 a
join TABLE_NAME2 b on (b.id = a.id + 1);
One way is to use the lag() window function to get the value of the previous row (As ordered by id; using timestamps would be better but between splitting up the date and time into different columns and not using a date format that can be meaningfully sorted, this is easier.):
SELECT id, weight,
round(coalesce(weight - lag(weight, 1) OVER (ORDER BY id), weight), 1) AS diff_weight
FROM example
ORDER BY id
which gives
id weight diff_weight
---------- ---------- -----------
1 22.0 22.0
2 22.2 0.2
3 55.0 32.8
4 55.9 0.9
5 55.2 -0.7
6 88.0 32.8
You can make a view of this query use that like a normal table if you like. Generating the differences dynamically like this has the advantage that if an existing weight value changes, everything that depends on it doesn't have to be updated.
ok, after a long search, here is a possible result (sqlite + java):
first, you need to query the last row of the table...
...and handle the case if there is no row in your table (blank or new table)
then you must query the 'Weight' value from the known column 'Weight' (Y) and the row with ID you already have (X)
and when you have your value (last_weight), you need to write the difference (weight-last_weight) in the column 'Diff_Weight'.
Here is the code:
SQLiteDatabase db = this.getWritableDatabase();
//query the last row of the table
Cursor cursor = db.rawQuery("SELECT ID FROM TABLE_NAME2 ORDER BY ID DESC LIMIT 1", null);
cursor.moveToLast();
int lastID;
//handle the case if there is no row in your table
try {
lastID = cursor.getInt(0);
} catch (Exception e) {
lastID = 0;
}
Double lastWeight = 0.0;
//query the 'Weight' value
if (lastID >= 1) {
Cursor cursor2 = db.rawQuery("SELECT Weight FROM TABLE_NAME2 WHERE ID=" + lastID, null);
if (cursor2.moveToFirst()) { //this is boundary otherwise 'lastWeight' doesn't get the value
lastWeight= cursor2.getDouble(0);
}
} else {
lastWeight = 0.0;
}
//write the difference in the Diff_Weight column (=COL_5)
ContentValues cValues2 = new ContentValues();
//add your data in COL_1 to COL_4 here...
cValues2.put(COL_5, weight - lastWeight);
long id2 = db.insert(TABLE_NAME2, null, cValues2);
... And so you get the red figures in the column Diff_Weight from the table photo in the question.

sql query to check existing records

I have data in following format:
HashMap<PageID, Set<SubscriberIDS>>
What I need to check is how many SubscriberIDS for each of the PageIDs do not exist in a MySQL table already. MySQL table has PageID and SubscriberID columns.
This is what I have so far:
String NEW_SUBSCRIBER_COUNT = "SELECT ? - COUNT(*) as new_subscribers from FB_SUBSCRIPTIONS WHERE PAGEID=? AND SUBSCRIBERID IN (?)";
First parameter being numberOFSubscriberIDs, Second being PageId and Third being SubscriberIds
but this will need to be hit for each pageId. How do I modify it to give me number of new subscribers for each PageID using single query.
Is there any specific need to do it in one query? Because while it can, it might actually be more readable to use your original solution and invoke a query for each page id. In any case, what you want can't be done in a single line, so you need to expect to loop at a given point.
// Build dynamic query
StringBuilder whereClause = new StringBuilder();
Iterator<PageID> it = yourMap.keySet().iterator();
while(it.hasNext()){
PageID key = it.next();
Set<SubscriberIDS> value = yourMap.get(key);
// You need to fill the 'IN' clause with multiple parameters, one for each subscriber id
StringBuilder inClause = new StringBuilder();
for(SubscriberIDS subId : value){
if(inClause.length > 0){
inClause.append(", ");
}
inClause.append("?");
preparedStatement.setInt(paramIndex++, subId.getId());
}
// For each page id we append a new 'OR' to our query
if(whereClause.lenght > 0){
whereClause.append(" OR ");
}
whereClause.append("(PAGEID=? AND SUBSCRIBERID IN (").append(inClause.toString()).append("))");
}
String query = "SELECT PAGEID, COUNT(SUBSCRIBERID) AS SUBSCRIBERS FROM FB_SUBSCRIPTIONS WHERE " + whereClause.toString() + " GROUP BY PAGEID";
// Create prepared statement and set parameters
PreparedStatement preparedStatement = connection.prepareStatement(query);
int paramIndex = 0;
it = yourMap.keySet().iterator();
while(it.hasNext()){
PageID key = it.next();
Set<SubscriberIDS> value = yourMap.get(key);
preparedStatement.setInt(paramIndex++, key.getId());
for(SubscriberIDS subId : value){
preparedStatement.setInt(paramIndex++, subId.getId());
}
}
// Execute query, loop over result and calculate new subscriptions
ResultSet rs = preparedStatement.executeQuery();
while(rs.next()){
int pageId = rs.getInt("PAGEID");
int newSubscriptions = yourMap.get(pageId).size() - rs.getInt("SUBSCRIBERS");
System.out.println(pageId + ", " + newSubscriptions);
}
Given following data in your map:
PAGEID SUBSCRIBERIDS
1 1,3,4,5,9
2 3,4,5,6,8,9
3 2,5,6
And following data in the DB:
PAGEID SUBSCRIBERIDS
1 3,4,10,11
2 1,2,5,7
3 1,2,5,6,7,8,9
This should give following output:
1,3
2,6
3,0
I haven't actually ran the code, so it might need some adjustments, but it gives you the general idea...

Use variable for column name in sql query

I'm searching for a word (variable) that occurs in multiple columns of a table. The search has to return the name of the columns in which the word in found.
I could use a foreach loop to go through each column seperately and know the presence of the word in that particular column when the returned cursor isn't null.
The problem is on how to use two different variables (one for column name and one for the word) in SQL query or rawQuery.
My code is as follows:
String[] columnsList = {"col1","col2","col3"};
String[] wordsList = {"fireman","camper","builder"};
for(String i : wordsList){
for(String j : columnsList){
Cursor wordQuery = myDatabaseHandle.rawQuery("Select * from myTableOne WHERE " + j + " = ?",new String[]{i});
if(!(wordQuery==null)){
Toast.makeText(this,j+"is success",Toast.LENGTH_SHORT).show();
}
}
}
But i'm unable to get the answer. I used a seperate string as:
String queryString = "Select * from myTableOne WHERE " + j ;
and in the query,
Cursor WordQuery = myDatabaseHandle.rawQuery(queryString+" = ?",new String[]{i});
But, it's just toasting the names of all columns.
Your error is here:
if(!(wordQuery==null)){
The cursor is never null.
It can contain 0 records, though.
You can check its length by using wordQuery.getCount()
Something like:
if((wordQuery.getCount() > 0)){

Is there a way to parse out column names instead of defining them?

Using Java/Selenium/Excel sheets I have an automation script. When verifying information in the database, I am doing something like this:
//Get values from Excel. Excel user will specify what table and what user
String table=currentTestSuiteXLS.getCellData(currentTestCaseName, "table",currentTestDataSetID);
String user=currentTestSuiteXLS.getCellData(currentTestCaseName, "user",currentTestDataSetID);
//Run query
PreparedStatement pstmt1 = conn.prepareStatement("SELECT * FROM " + table +" WHERE User = '" + user + "' ORDER BY 1 DESC LIMIT 1;");
if(table.equals("A")){
rs1.next();
//Get results from A table
String db_TableAColumn1=rs1.getString("TableAColumn1");
String db_TableAColumn2=rs1.getString("TableAColumn2");
String db_TableAColumn3=rs1.getString("TableAColumn3");
//Get values from excel
String excel_TableAColumn1=currentTestSuiteXLS.getCellData(currentTestCaseName, "TableAColumn1",currentTestDataSetID);
String excel_TableAColumn2=currentTestSuiteXLS.getCellData(currentTestCaseName, "TableAColumn2",currentTestDataSetID);
String excel_TableAColumn3=currentTestSuiteXLS.getCellData(currentTestCaseName, "TableAColumn3",currentTestDataSetID);
if(db_TableAColumn1.equals(excel_TableAColumnA)) { ...
if(db_TableAColumn2.equals(excel_TableAColumn2)) { ...
if(db_TableAColumn3.equals(excel_TableAColumn3)) { ...
if(table.equals("B")){
rs1.next();
//Get results from B table
String db_TableBColumn1=rs1.getString("TableBColumn1");
String db_TableBColumn2=rs1.getString("TableBColumn2");
String db_TableBColumn3=rs1.getString("TableBColumn3");
//Get values from excel
String excel_TableBColumn1=currentTestSuiteXLS.getCellData(currentTestCaseName, "TableBColumn1",currentTestDataSetID);
String excel_TableBColumn2=currentTestSuiteXLS.getCellData(currentTestCaseName, "TableBColumn2",currentTestDataSetID);
String excel_TableBColumn3=currentTestSuiteXLS.getCellData(currentTestCaseName, "TableBColumn3",currentTestDataSetID);
if(db_TableBColumn1.equals(excel_TableBColumn1)) { ...
if(db_TableBColumn2.equals(excel_TableBColumn2)) { ...
if(db_TableBColumn3.equals(excel_TableBColumn3)) { ...
So this is currently working fine. However, it is not very scalable.
If we want to start to check a new column on the A table (or if a new column is added) we need to update the java code. We only want to modify the excel sheet. Is there a way to parse all the columns from the result set, and if a column is in the excel sheet then we check to see if it exists in the result set, if so, then check to see if the expected values match up?
Is there a way to do For each column in the table
String GiveAName= rs1.getString(1); until all columns are given a name
Then if each column in specified in excel has a value, match up.
Instead of defining everything like:
String db_TableAColumn1=rs1.getString("TableAColumn1");
or
String db_TableAColumn1= rs1.getString(1);
Thanks.
Use ResultSetMetaData http://docs.oracle.com/javase/7/docs/api/java/sql/ResultSetMetaData.html,
It will allow you to dynamically retrieve column names of a table.
example:
ResultSetMetaData md = rs.getMetaData();
int columns = md.getColumnCount();
while (rs.next()){
for(int i=1; i<=columns; ++i){
System.out.println("Column name: " + md.getColumnName(i) + " Col Value: " +rs.getObject(i));
}
}

Need Better Algorithm to Scrub SQL Server Table with Java

I need to scrub an SQL Server table on a regular basis, but my solution is taking ridiculously long (about 12 minutes for 73,000 records).
My table has 4 fields:
id1
id2
val1
val2
For every group of records with the same "id1", I need to keep the first (lowest id2) and last (highest id2) and delete everything in between UNLESS val1 or val2 has changed from the previous (next lowest "id2") record.
If you're following me so far, what would a more efficient algorithm be? Here is my java code:
boolean bDEL=false;
qps = conn.prepareStatement("SELECT id1, id2, val1, val2 from STATUS_DATA ORDER BY id1, id2");
qrs = qps.executeQuery();
//KEEP FIRST & LAST, DISCARD EVERYTHING ELSE *EXCEPT* WHERE CHANGE IN val1 or val2
while (qrs.next()) {
thisID1 = qrs.getInt("id1");
thisID2 = qrs.getInt("id2");
thisVAL1= qrs.getInt("val1");
thisVAL2= qrs.getDouble("val2");
if (thisID1==lastID1) {
if (bDEL) { //Ensures this is not the last record
qps2 = conn2.prepareStatement("DELETE FROM STATUS_DATA where id1="+lastID1+" and id2="+lastID2);
qps2.executeUpdate();
qps2.close();
bDEL = false;
}
if (thisVAL1==lastVAL1 && thisVAL2==lastVAL2) {
bDEL = true;
}
} else if (bDEL) bDEL=false;
lastID1 = thisID1;
lastID2 = thisID2;
lastVAL1= thisVAL1;
lastVAL2= thisVAL2;
}
UPDATE 4/20/2015 # 11:10 AM
OK so here is my final solution - for every record, the Java code enters an XML record into a string which is written to file every 10,000 records and then java calls a stored procedure on SQL Server and passes the file name to read. The stored procedure can only use the file name as a variable if dynamic SQL is used to execute the openrowset. I will play around with the interval of procedure execution but so far my performance results are as follows:
BEFORE (1 record delete at a time):
73,000 records processed, 101 records per second
AFTER (bulk XML import):
1.4 Million records processed, 5800 records per second
JAVA SNIPPET:
String ts, sXML = "<DataRecords>\n";
boolean bDEL=false;
qps = conn.prepareStatement("SELECT id1, id2, val1, val2 from STATUS_DATA ORDER BY id1, id2");
qrs = qps.executeQuery();
//KEEP FIRST & LAST, DISCARD EVERYTHING ELSE *EXCEPT* WHERE CHANGE IN val1 or val2
while (qrs.next()) {
thisID1 = qrs.getInt("id1");
thisID2 = qrs.getInt("id2");
thisVAL1= qrs.getInt("val1");
thisVAL2= qrs.getDouble("val2");
if (bDEL && thisID1==lastID1) { //Ensures this is not the first or last record
sXML += "<nxtrec id1=\""+lastID1+"\" id2=\""+lastID2+"\"/>\n";
if ((i + 1) % 10000 == 0) { //Execute every 10000 records
sXML += "</DataRecords>\n"; //Close off Parent Tag
ts = String.valueOf((new java.util.Date()).getTime()); //Each XML File Uniquely Named
writeFile(sDir, "ds"+ts+".xml", sXML); //Write XML to file
conn2=dataSource.getConnection();
cs = conn2.prepareCall("EXEC SCRUB_DATA ?");
cs.setString(1, sdir + "ds"+ts+".xml");
cs.executeUpdate(); //Execute Stored Procedure
cs.close(); conn2.close();
deleteFile(SHMdirdata, "ds"+ts+".xml"); //Delete File
sXML = "<DataRecords>\n";
}
bDEL = false;
}
if (thisID1==lastID1 && thisVAL1==lastVAL1 && thisVAL2==lastVAL2) {
bDEL = true;
} else if (bDEL) bDEL=false;
} else if (bDEL) bDEL=false;
lastID1 = thisID1;
lastID2 = thisID2;
lastVAL1= thisVAL1;
lastVAL2= thisVAL2;
i++;
}
qrs.close(); qps.close(); conn.close();
sXML += "</DataRecords>\n";
ts = String.valueOf((new java.util.Date()).getTime());
writeFile(sdir, "ds"+ts+".xml", sXML);
conn2=dataSource.getConnection();
cs = conn2.prepareCall("EXEC SCRUB_DATA ?");
cs.setString(1, sdir + "ds"+ts+".xml");
cs.executeUpdate();
cs.close(); conn2.close();
deleteFile(SHMdirdata, "ds"+ts+".xml");
XML FILE OUTPUT:
<DataRecords>
<nxtrec id1="100" id2="1112"/>
<nxtrec id1="100" id2="1113"/>
<nxtrec id1="100" id2="1117"/>
<nxtrec id1="102" id2="1114"/>
...
<nxtrec id1="838" id2="1112"/>
</DataRecords>
SQL SERVER STORED PROCEDURE:
PROCEDURE [dbo].[SCRUB_DATA] #floc varchar(100) -- File Location (dir + filename) as only parameter
BEGIN
SET NOCOUNT ON;
DECLARE #sql as varchar(max);
SET #sql = '
DECLARE #XmlFile XML
SELECT #XmlFile = BulkColumn
FROM OPENROWSET(BULK ''' + #floc + ''', SINGLE_BLOB) x;
CREATE TABLE #TEMP_TABLE (id1 INT, id2 INT);
INSERT INTO #TEMP_TABLE (id1, id2)
SELECT
id1 = DataTab.value(''#id1'', ''int''),
id2 = DataTab.value(''#id2'', ''int'')
FROM
#XmlFile.nodes(''/DataRecords/nxtrec'') AS XTbl(DataTab);
delete from D
from STATUS_DATA D
inner join #TEMP_TABLE T on ( (T.id1 = D.id1) and (T.id2 = D.id2) );
';
EXEC (#sql);
END
It is almost for certain that your performance issues are not in your algorithm, but rather in the implementation. Say for example your cleanup step has to remove 10,000 records, this means you will have 10000 round trips to your database server.
Instead of doing that, write each of the id pairs to be deleted to an XML file, and send that XML file to SQL server stored proc that shreds the XML into a corresponding temp or temp_var table. Then use a single delete from (or equivalent) to delete all 10K rows.
If you don't know how to shred xml in TSQL, it is well worth the time to learn. Take a look at a simple example to get you started, out just check out a couple of search results for "tsql shred xml" to get going.
ADDED
Pulling 10K records to client should be < 1 second. Your Java code likewise. If you don't have the time to learn use XML as suggested, you could write a quick an dirty stored proc that accepts 10 (20, 50?) pairs of ids and delete the corresponding records from within the stored proc. I use the XML approach regularly to "batch" stuff from the client. If your batches are "large", you might take a look at using the BULK INSERT command on SQL Server -- but the XML is easy and a bit more flexible as it can contain nested data structures. E.g., master/detail relationships.
ADDED
I just did this locally
create table #tmp
(
id int not null
primary key(id)
)
GO
insert #tmp (id)
select 4
union
select 5
GO
-- now has two rows #tmp
delete from L
from TaskList L
inner join #tmp T on (T.id = L.taskID)
(2 row(s) affected)
-- and they are no longer in TaskList
i.e., this should not be a problem unless you are doing it wrong somehow. Are you creating the temp table and then attempting to use it in different databases connections/sessions. If the sessions are different, the temp table won't be seen in the 2nd session.
Hard to think of another way for this to be wrong off the top of my head.
Have you considered doing something that pushes more of the calculating to SQL instead of java?
This is ugly and doesn't take into account your "value changing" part, but it could be a lot faster:
(This deletes everything except the highest and lowest id2 for each id1)
select * into #temp
FROM (SELECT ROW_NUMBER() OVER (PARTITION BY id1 ORDER BY id2) AS 'RowNo',
* from myTable)x
delete from myTable i
left outer join
(select t.* from #temp t
left outer join (select id1, max(rowNo) rowNo from #temp group by id1) x
on x.id1 = t.id1 and x.rowNo = t.RowNo
where t.RowNo != 1 and x.rowNo is null)z
on z.id2 = i.id2 and z.id1 = i.id1
where z.id1 is not null
Never underestimate the power of SQL =)
Although I understand this seems more 'straightforward' to implement in a row-by-row fashion, doing it 'set-based' will make it fly.
Some code to create test-data:
SET NOCOUNT ON
IF OBJECT_ID('mySTATUS_DATA') IS NOT NULL DROP TABLE mySTATUS_DATA
GO
CREATE TABLE mySTATUS_DATA (id1 int NOT NULL,
id2 int NOT NULL PRIMARY KEY (id1, id2),
val1 varchar(100) NOT NULL,
val2 varchar(100) NOT NULL)
GO
DECLARE #counter int,
#id1 int,
#id2 int,
#val1 varchar(100),
#val2 varchar(100)
SELECT #counter = 100000,
#id1 = 1,
#id2 = 1,
#val1 = 'abc',
#val2 = '123456'
BEGIN TRANSACTION
WHILE #counter > 0
BEGIN
INSERT mySTATUS_DATA (id1, id2, val1, val2)
VALUES (#id1, #id2, #val1, #val2)
SELECT #counter = #counter - 1
SELECT #id2 = #id2 + 1
SELECT #id1 = #id1 + 1, #id2 = 1 WHERE Rand() > 0.8
SELECT #val1 = SubString(convert(varchar(100), NewID()), 0, 9) WHERE Rand() > 0.90
SELECT #val2 = SubString(convert(varchar(100), NewID()), 0, 9) WHERE Rand() > 0.90
if #counter % 1000 = 0
BEGIN
COMMIT TRANSACTION
BEGIN TRANSACTION
END
END
COMMIT TRANSACTION
SELECT top 1000 * FROM mySTATUS_DATA
SELECT COUNT(*) FROM mySTATUS_DATA
And here the code to do the actual scrubbing. Mind that the why column is there merely for educational purposes. If you're going to put this in production I'd advice to put it into comments as it only slows down the operations. Also, you could combine the checks on val1 and val2 in 1 single update... in fact, with a bit of effort you probably can combine everything into 1 single DELETE statement. However, I very much doubt it would make things much faster... but it surely would make things a lot less readable.
Anyway, when I run this on my laptop for 100k records it takes a only 5 seconds so I doubt performance is going to be an issue.
IF OBJECT_ID('tempdb..#working') IS NOT NULL DROP TABLE #working
GO
-- create copy of table
SELECT id1, id2, id2_seqnr = ROW_NUMBER() OVER (PARTITION BY id1 ORDER BY id2),
val1, val2,
keep_this_record = Convert(bit, 0),
why = Convert(varchar(500), NULL)
INTO #working
FROM STATUS_DATA
WHERE 1 = 2
-- load records
INSERT #working (id1, id2, id2_seqnr, val1, val2, keep_this_record, why)
SELECT id1, id2, id2_seqnr = ROW_NUMBER() OVER (PARTITION BY id1 ORDER BY id2),
val1, val2,
keep_this_record = Convert(bit, 0),
why = ''
FROM STATUS_DATA
-- index
CREATE UNIQUE CLUSTERED INDEX uq0 ON #working (id1, id2_seqnr)
-- make sure we keep the first record of each id1
UPDATE upd
SET keep_this_record = 1,
why = upd.why + 'first id2 for id1 = ' + Convert(varchar, id1) + ','
FROM #working upd
WHERE id2_seqnr = 1 -- first in sequence
-- make sure we keep the last record of each id1
UPDATE #working
SET keep_this_record = 1,
why = upd.why + 'last id2 for id1 = ' + Convert(varchar, upd.id1) + ','
FROM #working upd
JOIN (SELECT id1, max_seqnr = MAX(id2_seqnr)
FROM #working
GROUP BY id1) mx
ON upd.id1 = mx.id1
AND upd.id2_seqnr = mx.max_seqnr
-- check if val1 has changed versus the previous record
UPDATE upd
SET keep_this_record = 1,
why = upd.why + 'val1 for ' + Convert(varchar, upd.id1) + '/' + Convert(varchar, upd.id2) + ' differs from val1 for ' + Convert(varchar, prev.id1) + '/' + Convert(varchar, prev.id2) + ','
FROM #working upd
JOIN #working prev
ON prev.id1 = upd.id1
AND prev.id2_seqnr = upd.id2_seqnr - 1
AND prev.val1 <> upd.val1
-- check if val1 has changed versus the previous record
UPDATE upd
SET keep_this_record = 1,
why = upd.why + 'val2 for ' + Convert(varchar, upd.id1) + '/' + Convert(varchar, upd.id2) + ' differs from val2 for ' + Convert(varchar, prev.id1) + '/' + Convert(varchar, prev.id2) + ','
FROM #working upd
JOIN #working prev
ON prev.id1 = upd.id1
AND prev.id2_seqnr = upd.id2_seqnr - 1
AND prev.val2 <> upd.val2
-- delete those records we do not want to keep
DELETE del
FROM STATUS_DATA del
JOIN #working w
ON w.id1 = del.id1
AND w.id2 = del.id2
AND w.keep_this_record = 0
-- some info
SELECT TOP 500 * FROM #working ORDER BY id1, id2
SELECT TOP 500 * FROM STATUS_DATA ORDER BY id1, id2

Categories

Resources