第一步: 生成组合字段重复的临时表source_dup_simple create table source_dup_simple nologging pctfree 1 pctused 99 as 0select field_name1,field_name2,count(0) as num from source_table group by field_name1,field_name2 having count(0)>1.
第二步: 生成组合字段重复的主表里完整记录的临时表source_dup create table source_dup nologging pctfree 1 pctused 99 as 0select t1.* from source_table t1,source_dup_simple t2 where t1.field_name1=t2.field_name1 and t1.field_name2=t2.field_name2.
第三步: 删去source_dup里的重复记录
--可选择:保留rowid小的记录 0delete from source_dup a where rowid > ( 0select min(rowid) from source_dup b where a.field_name1 = b.field_name1 and a.field_name2=b.field_name2). commit.
--可选择:保留rowid大的记录 0delete from source_dup a where rowid < ( 0select max(rowid) from source_dup b where a.field_name1 = b.field_name1 and a.field_name2=b.field_name2). commit.
0delete from source_dup a where date_field < ( 0select max(date_field) from source_dup b where a.field_name1 = b.field_name1 and a.field_name2=b.field_name2). commit.
--可选择:保留时间字段date_field小的记录
0delete from source_dup a where date_field > ( 0select min(date_field) from source_dup b where a.field_name1 = b.field_name1 and a.field_name2=b.field_name2). commit.
如果时间字段上有重复,还需要再次根据rowid来删一次
0delete from source_dup a where rowid < ( 0select max(rowid) from source_dup b where a.field_name1 = b.field_name1 and a.field_name2=b.field_name2). commit.
第四步: 删去所有重复组合字段原始表里记录 0delete from source_table where field_name1||field_name2 in (0select field_name1||field_name2 from source_dup_simple). commit.