Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions AppendFeaturesToLayer/processing/algs/AppendFeaturesToLayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,17 +223,16 @@ def processAlgorithm(self, parameters, context, feedback):
target_field = target.fields().field(target_idx)

if target_idx in target.primaryKeyAttributes():
# when the pk is a UUID (or string), we can allways allow updating it
if target_field.type() not in [QMetaType.QString, QMetaType.QUuid]:
# We won't update PKs on UPDATE mode, that would be dangerous (at least most of the times)!
if action_on_duplicate == self.UPDATE_EXISTING_FEATURE:
continue

# When the PK is a UUID and target layer is a PG layer, we add the PK to the mapping,
# so that we can keep the incoming PK for non-duplicate features.
if target.dataProvider().name() != 'postgres' or target_field.typeName() != "uuid":
# Check that we don't have an automatic PK.
# Note that for non-automatic PKs, PG is giving a nextval(NULL) as default clause (which should be '').
if target.dataProvider().defaultValueClause(target_idx) not in ['', 'nextval(NULL)']:
continue # We won't be able to update automatic PKs, so skip them

# Note: Non-automatic PKs will be treated later, when passing the mapping to update/append.

if target.dataProvider().storageType() == 'GPKG' and target_field.name() == 'fid':
continue # We won't be able to update a GPKG FID, so skip it.

Expand Down Expand Up @@ -308,6 +307,11 @@ def processAlgorithm(self, parameters, context, feedback):
geom.avoidIntersections(QgsProject.instance().avoidIntersectionsLayers())

if target_feature_exists and action_on_duplicate in (self.UPDATE_EXISTING_FEATURE, self.UPDATE_EXISTING_GEOMETRY):
# If target PK is in attrs, we should remove it from attrs,
# since we shouldn't try to update the PK, which could be dangerous.
for target_pk_idx in target.primaryKeyAttributes():
attrs.pop(target_pk_idx, None) # Deletes without KeyErrors

for t_f in target.getFeatures(target_value_dict[duplicate_target_value]):
duplicate_features_set.add(t_f.id())
if action_on_duplicate == self.UPDATE_EXISTING_FEATURE:
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ This algorithm allows you to choose a field in `source` and `target` layers to c

The algorithm deals with target layer's Primary Keys in this way:

| PRIMARY KEY | APPEND mode | UPDATE mode |
|:-------------------------------:|:------------------------------------------------------------------------------------------------------------------:|:------------------------------------------:|
| Automatic PK<br/>(e.g., serial) | It lets the provider (e.g., PostgreSQL, GeoPackage, etc.) fill the value automatically | It doesn't modify the value already stored |
| Non-automatic PK | You need to provide a value for the PK in the source layer, because such value wil be set in the target layer's PK | It doesn't modify the value already stored |
| PRIMARY KEY | APPEND mode | UPDATE mode |
|:-------------------------------:|:------------------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| Automatic PK<br/>(e.g., serial) | It lets the provider (e.g., PostgreSQL, GeoPackage, etc.) fill the value automatically | **Duplicate features**: It doesn't modify the value already stored<br/>**Non-duplicate features**: Let's the provider fill the value automatically, **except** for UUID PKs on PostgreSQL, where the value is set from the source layer (if given)! |
| Non-automatic PK | You need to provide a value for the PK in the source layer, because such value wil be set in the target layer's PK | **Duplicate features**: It doesn't modify the value already stored<br/>**Non-duplicate features**: The value is set from the source layer (it should always be given)! |

**Note on geometry updates**

Expand Down Expand Up @@ -168,7 +168,7 @@ Make sure the plugin can be found in your QGIS plugins folder, that is, that you
First, you need to set 2 environment variables:

export GITHUB_WORKSPACE=/path/to/AppendFeaturesToLayer/
export QGIS_TEST_VERSION="3.40.15-noble"
export QGIS_TEST_VERSION="3.44.7-noble"

After that, you could run unit tests locally with this command:

Expand Down
257 changes: 254 additions & 3 deletions tests/test_pk.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,37 @@ def test_append_update_pks_gpkg(self):
# The only updated value
self.assertEqual(output_layer.getFeature(1)["descripcion"], 'Los datos deben corresponder a su modelo')

# Finally, let's create a new feature in source and run on UPDATE mode.
# We check here that we don't set the target PK (T_Id) field, even for new (i.e.,
# non-duplicate) features, but let the provider calculate the new PK instead.
f = QgsFeature(input_layer.fields())
f.setAttribute("T_Id", 110)
f.setAttribute("codigo", "R0010")
f.setAttribute("descripcion", "ZYX")
self.assertTrue(input_layer.dataProvider().addFeatures([f]))

res = processing.run("etl_load:appendfeaturestolayer",
{'SOURCE_LAYER': input_layer,
'SOURCE_FIELD': 'codigo',
'TARGET_LAYER': output_layer,
'TARGET_FIELD': 'codigo',
'ACTION_ON_DUPLICATE': 2}) # UPDATE

self.assertEqual(res['TARGET_LAYER'].featureCount(), 4)
self.assertEqual(res[APPENDED_COUNT], 1) # The new source feature which had T_Id=110
self.assertEqual(res[UPDATED_FEATURE_COUNT], 3) # 3 matching features counted as UPDATED
self.assertIsNone(res[SKIPPED_COUNT])

# print([f.name() for f in output_layer.fields()])
# print([f.attributes() for f in output_layer.getFeatures()])

# It's the provider that creates the PK (T_Id),
# so we don't see the 110 from the appended feature!
self.assertEqual([f["T_Id"] for f in output_layer.getFeatures()], [1, 2, 3, 4])

# The only appended value
self.assertEqual(output_layer.getFeature(4)["descripcion"], 'ZYX')

def test_append_update_pks_pg_serial_notnull(self):
print('\nINFO: Validating avoiding to set/update PKs (serial, NOT NULL) in PG...')
source_gpkg = get_test_file_copy_path('source_pk.gpkg') # fid, T_Id, codigo, descripcion
Expand Down Expand Up @@ -149,6 +180,37 @@ def test_append_update_pks_pg_serial_notnull(self):
# The only updated value
self.assertEqual(pg_layer.getFeature(1)["descripcion"], 'Los datos deben corresponder a su modelo')

# Finally, let's create a new feature in source and run on UPDATE mode.
# We check here that we don't set the target PK (T_Id) field, even for new (i.e.,
# non-duplicate) features, but let the provider calculate the new PK instead.
f = QgsFeature(input_layer.fields())
f.setAttribute("T_Id", 110)
f.setAttribute("codigo", "R0010")
f.setAttribute("descripcion", "ZYX")
self.assertTrue(input_layer.dataProvider().addFeatures([f]))

res = processing.run("etl_load:appendfeaturestolayer",
{'SOURCE_LAYER': input_layer,
'SOURCE_FIELD': 'codigo',
'TARGET_LAYER': pg_layer,
'TARGET_FIELD': 'codigo',
'ACTION_ON_DUPLICATE': 2}) # UPDATE

self.assertEqual(res['TARGET_LAYER'].featureCount(), 4)
self.assertEqual(res[APPENDED_COUNT], 1) # The new source feature which had T_Id=110
self.assertEqual(res[UPDATED_FEATURE_COUNT], 3) # 3 matching features counted as UPDATED
self.assertIsNone(res[SKIPPED_COUNT])

# print([f.name() for f in pg_layer.fields()])
# print([f.attributes() for f in pg_layer.getFeatures()])

# It's the provider that creates the PK (T_Id),
# so we don't see the 110 from the appended feature!
self.assertEqual([f["T_Id"] for f in pg_layer.getFeatures()], [1, 2, 3, 4])

# The only appended value
self.assertEqual(pg_layer.getFeature(4)["descripcion"], 'ZYX')

def test_append_update_pks_pg_no_serial_notnull(self):
print('\nINFO: Validating avoiding to set/update PKs (no serial, NOT NULL) in PG...')
source_gpkg = get_test_file_copy_path('source_pk.gpkg') # fid, T_Id, codigo, descripcion
Expand Down Expand Up @@ -188,7 +250,10 @@ def test_append_update_pks_pg_no_serial_notnull(self):

# print([f.name() for f in pg_layer.fields()])
# print([f.attributes() for f in pg_layer.getFeatures()])
self.assertEqual([f["T_Id"] for f in pg_layer.getFeatures()], [1, 100, 101]) # Automatic PKs
self.assertEqual([f["T_Id"] for f in pg_layer.getFeatures()], [1, 100, 101]) # Non-automatic PKs

# Check that our ABC description is still there (will be updated in the next run)
self.assertEqual(pg_layer.getFeature(1)["descripcion"], 'ABC')

res = processing.run("etl_load:appendfeaturestolayer",
{'SOURCE_LAYER': input_layer,
Expand All @@ -204,11 +269,45 @@ def test_append_update_pks_pg_no_serial_notnull(self):

# print([f.name() for f in pg_layer.fields()])
# print([f.attributes() for f in pg_layer.getFeatures()])
self.assertEqual([f["T_Id"] for f in pg_layer.getFeatures()], [1, 100, 101]) # We don't touch the automatic PKs

# # The only updated value
# PKs are not changed, i.e., T_Id 1 remains being 1, in spite of having
# a matching duplicate feature (codigo=R0001) from source with T_id=100
self.assertEqual([f["T_Id"] for f in pg_layer.getFeatures()], [1, 100, 101])

# The only updated value
self.assertEqual(pg_layer.getFeature(1)["descripcion"], 'Los datos deben corresponder a su modelo')

# Finally, let's create a new feature in source and run on UPDATE mode.
# We check here that we don't update the target PK (T_Id) field for duplicate features,
# BUT we do set the target PK from new (i.e., non-duplicate) features.
f = QgsFeature(input_layer.fields())
f.setAttribute("T_Id", 110)
f.setAttribute("codigo", "R0010")
f.setAttribute("descripcion", "ZYX")
self.assertTrue(input_layer.dataProvider().addFeatures([f]))

res = processing.run("etl_load:appendfeaturestolayer",
{'SOURCE_LAYER': input_layer,
'SOURCE_FIELD': 'codigo',
'TARGET_LAYER': pg_layer,
'TARGET_FIELD': 'codigo',
'ACTION_ON_DUPLICATE': 2}) # UPDATE

self.assertEqual(res['TARGET_LAYER'].featureCount(), 4)
self.assertEqual(res[APPENDED_COUNT], 1) # The new source feature which had T_Id=110
self.assertEqual(res[UPDATED_FEATURE_COUNT], 3) # 3 matching features counted as UPDATED
self.assertIsNone(res[SKIPPED_COUNT])

# print([f.name() for f in pg_layer.fields()])
# print([f.attributes() for f in pg_layer.getFeatures()])

# Since this time the target PK is not automatic, for non-duplicate features
# we take it from the source, so we have the 110 from the appended feature!
self.assertEqual([f["T_Id"] for f in pg_layer.getFeatures()], [1, 100, 101, 110])

# The only appended value
self.assertEqual(pg_layer.getFeature(110)["descripcion"], 'ZYX')

def test_append_update_pks_pg_uuid_notnull(self):
print('\nINFO: Validating to set/update PKs (UUID, NOT NULL) in PG...')

Expand Down Expand Up @@ -465,6 +564,158 @@ def test_append_update_pks_pg_uuid_notnull(self):
# T_Id: uuid_7, codigo: R0007, descripcion: STU
# T_Id: uuid_8, codigo: R0008, descripcion: VWX

def test_append_update_pks_pg_uuid_non_auto(self):
print('\nINFO: Validating to set/update PKs (UUID, NON-AUTO) in PG...')

# Create empty input layer
input_layer = QgsVectorLayer(
"Point?crs=epsg:3116&field=fid:integer&field=T_Id:string&field=codigo:string&field=descripcion:string",
"uuid-layer", "memory")
self.assertTrue(input_layer.isValid())

# Get target layer with UUID PK
target_layer = get_qgis_pg_layer(PG_BD_1, 'tipo_regla_uuid_non_auto', truncate=True) # T_Id, codigo, descripcion
self.assertTrue(target_layer.isValid())
self.assertEqual(target_layer.featureCount(), 0)

QgsProject.instance().addMapLayers([input_layer, target_layer])

# Create a features on the target. We need to pass a UUID.
f = QgsFeature(target_layer.fields())
uuid_1 = '15431753-059f-4c23-ba60-0e0fc0b28fa5'
f.setAttribute("T_Id", uuid_1)
f.setAttribute("codigo", "R0001")
f.setAttribute("descripcion", "ABC")
self.assertTrue(target_layer.dataProvider().addFeatures([f]))
self.assertEqual(target_layer.featureCount(), 1)

# Status in the target layer:
# T_Id: uuid_1, codigo: R0001, descripcion: ABC

# Create one features on the input with a new UUID.
f = QgsFeature(input_layer.fields())
uuid_2 = '12616fa9-f8f8-4746-b5e9-302b387cdb8f'
f.setAttribute("T_Id", uuid_2)
f.setAttribute("codigo", "R0002")
f.setAttribute("descripcion", "GHI")
self.assertTrue(input_layer.dataProvider().addFeatures([f]))

# Status in the input layer:
# T_Id: uuid_2, codigo: R0002, descripcion: GHI
# Status in the target layer:
# T_Id: uuid_1, codigo: R0001, descripcion: ABC

# Expected is the adding of two features, one with the given and the other with the generated UUID
res = processing.run("etl_load:appendfeaturestolayer",
{'SOURCE_LAYER': input_layer,
'SOURCE_FIELD': None,
'TARGET_LAYER': target_layer,
'TARGET_FIELD': None,
'ACTION_ON_DUPLICATE': 0}) # No action

self.assertEqual(res['TARGET_LAYER'].featureCount(), 2)
self.assertEqual(res[APPENDED_COUNT], 1)
self.assertIsNone(res[UPDATED_FEATURE_COUNT])
self.assertIsNone(res[SKIPPED_COUNT])

# Status in the input layer:
# T_Id: uuid_2, codigo: R0002, descripcion: GHI

# Status in the target layer:
# T_Id: uuid_1, codigo: R0001, descripcion: ABC
# T_Id: uuid_2, codigo: R0002, descripcion: GHI

# Let's prepare the input for a meaningful update
# New feature to update R0001
f = QgsFeature(input_layer.fields())
f.setAttribute("T_Id", uuid_1)
f.setAttribute("codigo", "R0001")
f.setAttribute("descripcion", "DEF")
self.assertTrue(input_layer.dataProvider().addFeatures([f]))

# Remove redundant feature
request = QgsFeatureRequest()
request.setFilterExpression(f'"codigo" = \'R0002\'')
R0002_feature = list(input_layer.getFeatures(request))[0]
input_layer.dataProvider().deleteFeatures([R0002_feature.id()])
self.assertEqual(input_layer.featureCount(), 1)

# Status in the input layer:
# T_Id: uuid_1, codigo: R0001, descripcion: DEF

# Status in the target layer:
# T_Id: uuid_1, codigo: R0001, descripcion: ABC
# T_Id: uuid_2, codigo: R0002, descripcion: GHI

# And we perform an update for the duplicate considering the T_Id (UUID)
res = processing.run("etl_load:appendfeaturestolayer",
{'SOURCE_LAYER': input_layer,
'SOURCE_FIELD': 'codigo',
'TARGET_LAYER': target_layer,
'TARGET_FIELD': 'codigo',
'ACTION_ON_DUPLICATE': 2}) # UPDATE

self.assertEqual(res['TARGET_LAYER'].featureCount(), 2)
self.assertEqual(res[APPENDED_COUNT], 0) # No new appended (all already exist)
self.assertEqual(res[UPDATED_FEATURE_COUNT], 1)
self.assertIsNone(res[SKIPPED_COUNT])

uuids_in_target = [f["T_Id"] for f in target_layer.getFeatures()]
self.assertEqual(set(uuids_in_target), {uuid_1, uuid_2})
desc_in_target = [f["descripcion"] for f in target_layer.getFeatures()]
self.assertEqual(set(desc_in_target), {'DEF', 'GHI'})

# Status in the input layer:
# T_Id: uuid_1, codigo: R0001, descripcion: DEF

# Status in the target layer:
# T_Id: uuid_1, codigo: R0001, descripcion: DEF
# T_Id: uuid_2, codigo: R0002, descripcion: GHI

# Now we add a new one with a new UUID, which should be added without problems,
# i.e., taking the PK from the source for the non-duplicate feature (uuid_3)
f = QgsFeature(input_layer.fields())
uuid_3 = 'bae27e96-bffc-4b27-9cdc-162731043293'
f.setAttribute("T_Id", uuid_3)
f.setAttribute("codigo", "R0003")
f.setAttribute("descripcion", "JKL")
self.assertTrue(input_layer.dataProvider().addFeatures([f]))

# Status in the input layer:
# T_Id: uuid_1, codigo: R0001, descripcion: DEF
# T_Id: uuid_3, codigo: R0003, descripcion: JKL

# Status in the target layer:
# T_Id: uuid_1, codigo: R0001, descripcion: DEF
# T_Id: uuid_2, codigo: R0002, descripcion: GHI

# And we perform an update for the duplicate considering the T_Id (UUID)
res = processing.run("etl_load:appendfeaturestolayer",
{'SOURCE_LAYER': input_layer,
'SOURCE_FIELD': 'codigo',
'TARGET_LAYER': target_layer,
'TARGET_FIELD': 'codigo',
'ACTION_ON_DUPLICATE': 2}) # UPDATE

self.assertEqual(res['TARGET_LAYER'].featureCount(), 3)
self.assertEqual(res[APPENDED_COUNT], 1)
self.assertEqual(res[UPDATED_FEATURE_COUNT], 1) # The four others are updated, although without changes
self.assertIsNone(res[SKIPPED_COUNT])

uuids_in_target = [f["T_Id"] for f in target_layer.getFeatures()]
self.assertEqual(set(uuids_in_target), {uuid_1, uuid_2, uuid_3})
desc_in_target = [f["descripcion"] for f in target_layer.getFeatures()]
self.assertEqual(set(desc_in_target), {'DEF', 'GHI', 'JKL'})

# Status in the input layer:
# T_Id: uuid_1, codigo: R0001, descripcion: DEF
# T_Id: uuid_3, codigo: R0003, descripcion: JKL

# Status in the target layer:
# T_Id: uuid_1, codigo: R0001, descripcion: DEF
# T_Id: uuid_2, codigo: R0002, descripcion: GHI
# T_Id: uuid_3, codigo: R0003, descripcion: JKL

@classmethod
def tearDownClass(cls):
print('INFO: Tear down TestTablePK')
Expand Down
Loading
Loading