[ADD] util/update_table_from_dict

Pirols · Pirols · commit 20f04c9486e2 · 2025-07-29T17:27:51.000+02:00
A recurrent challenge in writing upgrade scripts is that of updating values in a
table based on some form of already available mapping from the id (or another
identifier) to the new value, this is often addressed with an iterative solution
in the form:

```python
for key, value in mapping.items():
    cr.execute(
        """
        UPDATE table
           SET col = %s
         WHERE key_col = %s
        """,
        [value, key],
    )
```

or in a more efficient (only issuing a single query) but hacky way:

```python
cr.execute(
    """
    UPDATE table
       SET col = (%s::jsonb)-&gt;&gt;(key_col::text)
     WHERE key_col = ANY(%s)
    """,
    [json.dumps(mapping), list(mapping)],
)
```

With the former being ineffective for big mappings and the latter often
requiring some comments at review time to get it right.
This commit introduces a util meant to make it easier to efficiently perform
such updates.
diff --git a/src/base/tests/test_util.py b/src/base/tests/test_util.py
@@ -881,6 +881,115 @@ def test_parallel_execute_retry_on_serialization_failure(self):
         cr.execute(util.format_query(cr, "SELECT 1 FROM {}", TEST_TABLE_NAME))
         self.assertFalse(cr.rowcount)
 
+    def test_update_one_col_from_dict(self):
+        TEST_TABLE_NAME = "_upgrade_update_one_col_from_dict_test_table"
+        N_ROWS = 10
+
+        cr = self._get_cr()
+
+        cr.execute(
+            util.format_query(
+                cr,
+                """
+                DROP TABLE IF EXISTS {table};
+
+                CREATE TABLE {table} (
+                    id SERIAL PRIMARY KEY,
+                    col1 INTEGER,
+                    col2 INTEGER
+                );
+
+                INSERT INTO {table} (col1, col2) SELECT v, v FROM GENERATE_SERIES(1, %s) as v;
+                """
+                % N_ROWS,
+                table=TEST_TABLE_NAME,
+            )
+        )
+        mapping = {id: id * 2 for id in range(1, N_ROWS + 1, 2)}
+        util.update_table_from_dict(cr, TEST_TABLE_NAME, "col1", mapping)
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE col2 != id",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise unintended column is affected
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE col1 != id AND MOD(id, 2) = 0",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise unintended rows are affected
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE col1 != 2 * id AND MOD(id, 2) = 1",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise partial/incorrect updates are performed
+
+    def test_update_multiple_cols_from_dict(self):
+        TEST_TABLE_NAME = "_upgrade_update_multiple_cols_from_dict_test_table"
+        N_ROWS = 10
+
+        cr = self._get_cr()
+
+        cr.execute(
+            util.format_query(
+                cr,
+                """
+                DROP TABLE IF EXISTS {table};
+
+                CREATE TABLE {table} (
+                    id SERIAL PRIMARY KEY,
+                    col1 INTEGER,
+                    col2 INTEGER,
+                    col3 INTEGER
+                );
+
+                INSERT INTO {table} (col1, col2, col3) SELECT v, v, v FROM GENERATE_SERIES(1, %s) as v;
+                """
+                % N_ROWS,
+                table=TEST_TABLE_NAME,
+            )
+        )
+        mapping = {id: [id * 2, id * 3] for id in range(1, N_ROWS + 1, 2)}
+        util.update_table_from_dict(cr, TEST_TABLE_NAME, ["col1", "col2"], mapping)
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE col3 != id",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise unintended column is affected
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE col1 != id AND MOD(id, 2) = 0",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise unintended rows are affected
+
+        cr.execute(
+            util.format_query(
+                cr,
+                "SELECT id FROM {table} WHERE (col1 != 2 * id OR col2 != 3 * id) AND MOD(id, 2) = 1",
+                table=TEST_TABLE_NAME,
+            )
+        )
+        self.assertFalse(cr.rowcount)  # otherwise partial/incorrect updates are performed
+
     def test_create_column_with_fk(self):
         cr = self.env.cr
         self.assertFalse(util.column_exists(cr, "res_partner", "_test_lang_id"))
diff --git a/src/util/pg.py b/src/util/pg.py
@@ -2,6 +2,7 @@
 """Utility functions for interacting with PostgreSQL."""
 
 import collections
+import json
 import logging
 import os
 import re
@@ -43,7 +44,7 @@
 
 from .exceptions import MigrationError, SleepyDeveloperError
 from .helpers import _validate_table, model_of_table
-from .misc import Sentinel, log_progress, version_gte
+from .misc import Sentinel, chunks, log_progress, version_gte
 
 _logger = logging.getLogger(__name__)
 
@@ -1621,3 +1622,100 @@ def create_id_sequence(cr, table, set_as_default=True):
                 table=table_sql,
             )
         )
+
+
+def update_table_from_dict(cr, table, columns, mapping, key_col="id", bucket_size=DEFAULT_BUCKET_SIZE):
+    """
+    Update table based on mapping.
+
+    Each `mapping` entry defines the new values for the specified `columns` for the row(s) whose `key_col` value matches the key.
+
+    .. important::
+
+       `columns` can be either a string or a list of strings. Crucially the values of the provided `mapping` must have the same
+       **type** and, if a list, the same **dimensionality** and **order**. Otherwise:
+
+       .. code-block:: python
+
+          columns = ["int_col", "text_col"]
+          mapping = {
+              1: [111, "foo", True],    # third value is ignored
+              2: [222],                 # text_col is set to NULL
+              3: ["bar", 333],          # will attempt to set `int_col` to "bar" and `text_col` to 789
+              4: 444,                   # text_col is set to NULL
+          }
+
+    .. example::
+
+       .. code-block:: python
+
+          # single column update
+          util.update_table_from_dict(
+              cr,
+              "account_move",
+              "always_tax_eligible",
+              {
+                  1: True,
+                  2: False,
+              },
+          )
+
+          # multi-column update
+          util.update_table_from_dict(
+              cr,
+              "account_move",
+              ["closing_return_id", "always_tax_eligible"],
+              {
+                  1: [2, True],
+                  2: [3, False],
+              },
+          )
+
+    .. warning::
+
+       As a side effect, the cursor may be committed.
+
+    :param str table: database's table to perform the update of
+    :param str | list[str] columns: table's columns to update
+    :param dict[any, any | list[any]] mapping: matches values of `key_col` to the new `columns` values
+    :param str key_col: column to match against keys of `mapping`
+    :param int bucket_size: maximum number of rows to update per single query
+    """
+    _validate_table(table)
+    if not columns or not mapping:
+        return
+
+    if isinstance(columns, str):
+        columns = [columns]
+
+    query = format_query(
+        cr,
+        """
+        UPDATE {table} t
+           SET ({cols}) = ROW({cols_values})
+          FROM JSONB_EACH(%s) m
+         WHERE t.{key_col}::varchar = m.key
+        """,
+        table=table,
+        cols=ColumnList.from_unquoted(cr, columns),
+        cols_values=SQLStr(
+            ", ".join(
+                "(m.value->>{:d})::{}".format(
+                    col_idx, sql.Identifier(column_type(cr, table, col_name)).as_string(cr._cnx)
+                )
+                for col_idx, col_name in enumerate(columns)
+            )
+        ),
+        key_col=key_col,
+    )
+
+    if len(mapping) <= 1.1 * bucket_size:
+        cr.execute(query, [json.dumps(mapping)])
+    else:
+        parallel_execute(
+            cr,
+            [
+                cr.mogrify(query, [json.dumps(mapping_chunk)]).decode()
+                for mapping_chunk in chunks(mapping.items(), bucket_size, fmt=dict)
+            ],
+        )