mirror of
https://github.com/launchbadge/sqlx.git
synced 2025-10-02 15:25:32 +00:00
fix(postgres): patch nullable inference in Postgres using EXPLAIN
BREAKING CHANGE: some columns in `query!()` et. al. output will change from `T` to `Option<T>` breakage should be minimal in practice as these columns will need to have been manually overridden anyway to avoid runtime errors Signed-off-by: Austin Bonander <austin@launchbadge.com>
This commit is contained in:
parent
6b036055e5
commit
89305873b0
@ -21,7 +21,7 @@ migrate = [ "sha2", "crc" ]
|
|||||||
|
|
||||||
# databases
|
# databases
|
||||||
all-databases = [ "postgres", "mysql", "sqlite", "mssql", "any" ]
|
all-databases = [ "postgres", "mysql", "sqlite", "mssql", "any" ]
|
||||||
postgres = [ "md-5", "sha2", "base64", "sha-1", "rand", "hmac", "futures-channel/sink", "futures-util/sink" ]
|
postgres = [ "md-5", "sha2", "base64", "sha-1", "rand", "hmac", "futures-channel/sink", "futures-util/sink", "json" ]
|
||||||
mysql = [ "sha-1", "sha2", "generic-array", "num-bigint", "base64", "digest", "rand", "rsa" ]
|
mysql = [ "sha-1", "sha2", "generic-array", "num-bigint", "base64", "digest", "rand", "rsa" ]
|
||||||
sqlite = [ "libsqlite3-sys" ]
|
sqlite = [ "libsqlite3-sys" ]
|
||||||
mssql = [ "uuid", "encoding_rs", "regex" ]
|
mssql = [ "uuid", "encoding_rs", "regex" ]
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::ext::ustr::UStr;
|
use crate::ext::ustr::UStr;
|
||||||
use crate::postgres::message::{ParameterDescription, RowDescription};
|
use crate::postgres::message::{ParameterDescription, RowDescription};
|
||||||
|
use crate::postgres::statement::PgStatementMetadata;
|
||||||
use crate::postgres::type_info::{PgCustomType, PgType, PgTypeKind};
|
use crate::postgres::type_info::{PgCustomType, PgType, PgTypeKind};
|
||||||
use crate::postgres::{PgArguments, PgColumn, PgConnection, PgTypeInfo};
|
use crate::postgres::{PgArguments, PgColumn, PgConnection, PgTypeInfo};
|
||||||
use crate::query_as::query_as;
|
use crate::query_as::query_as;
|
||||||
use crate::query_scalar::{query_scalar, query_scalar_with};
|
use crate::query_scalar::{query_scalar, query_scalar_with};
|
||||||
use crate::HashMap;
|
use crate::HashMap;
|
||||||
|
use crate::types::Json;
|
||||||
use futures_core::future::BoxFuture;
|
use futures_core::future::BoxFuture;
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@ -247,22 +249,23 @@ SELECT oid FROM pg_catalog.pg_type WHERE typname ILIKE $1
|
|||||||
|
|
||||||
pub(crate) async fn get_nullable_for_columns(
|
pub(crate) async fn get_nullable_for_columns(
|
||||||
&mut self,
|
&mut self,
|
||||||
columns: &[PgColumn],
|
stmt_id: u32,
|
||||||
|
meta: &PgStatementMetadata,
|
||||||
) -> Result<Vec<Option<bool>>, Error> {
|
) -> Result<Vec<Option<bool>>, Error> {
|
||||||
if columns.is_empty() {
|
if meta.columns.is_empty() {
|
||||||
return Ok(vec![]);
|
return Ok(vec![]);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut query = String::from("SELECT NOT pg_attribute.attnotnull FROM (VALUES ");
|
let mut nullable_query = String::from("SELECT NOT pg_attribute.attnotnull FROM (VALUES ");
|
||||||
let mut args = PgArguments::default();
|
let mut args = PgArguments::default();
|
||||||
|
|
||||||
for (i, (column, bind)) in columns.iter().zip((1..).step_by(3)).enumerate() {
|
for (i, (column, bind)) in meta.columns.iter().zip((1..).step_by(3)).enumerate() {
|
||||||
if !args.buffer.is_empty() {
|
if !args.buffer.is_empty() {
|
||||||
query += ", ";
|
nullable_query += ", ";
|
||||||
}
|
}
|
||||||
|
|
||||||
let _ = write!(
|
let _ = write!(
|
||||||
query,
|
nullable_query,
|
||||||
"(${}::int4, ${}::int4, ${}::int2)",
|
"(${}::int4, ${}::int4, ${}::int2)",
|
||||||
bind,
|
bind,
|
||||||
bind + 1,
|
bind + 1,
|
||||||
@ -274,7 +277,7 @@ SELECT oid FROM pg_catalog.pg_type WHERE typname ILIKE $1
|
|||||||
args.add(column.relation_attribute_no);
|
args.add(column.relation_attribute_no);
|
||||||
}
|
}
|
||||||
|
|
||||||
query.push_str(
|
nullable_query.push_str(
|
||||||
") as col(idx, table_id, col_idx) \
|
") as col(idx, table_id, col_idx) \
|
||||||
LEFT JOIN pg_catalog.pg_attribute \
|
LEFT JOIN pg_catalog.pg_attribute \
|
||||||
ON table_id IS NOT NULL \
|
ON table_id IS NOT NULL \
|
||||||
@ -283,8 +286,104 @@ SELECT oid FROM pg_catalog.pg_type WHERE typname ILIKE $1
|
|||||||
ORDER BY col.idx",
|
ORDER BY col.idx",
|
||||||
);
|
);
|
||||||
|
|
||||||
query_scalar_with::<_, Option<bool>, _>(&query, args)
|
let mut nullables = query_scalar_with::<_, Option<bool>, _>(&nullable_query, args)
|
||||||
.fetch_all(self)
|
.fetch_all(&mut *self)
|
||||||
.await
|
.await?;
|
||||||
|
|
||||||
|
// patch up our null inference with data from EXPLAIN
|
||||||
|
let nullable_patch = self
|
||||||
|
.nullables_from_explain(stmt_id, meta.parameters.len())
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
for (nullable, patch) in nullables.iter_mut().zip(nullable_patch) {
|
||||||
|
*nullable = patch.or(*nullable);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(nullables)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Infer nullability for columns of this statement using EXPLAIN VERBOSE.
|
||||||
|
///
|
||||||
|
/// This currently only marks columns that are on the inner half of an outer join
|
||||||
|
/// and returns `None` for all others.
|
||||||
|
async fn nullables_from_explain(
|
||||||
|
&mut self,
|
||||||
|
stmt_id: u32,
|
||||||
|
params_len: usize,
|
||||||
|
) -> Result<Vec<Option<bool>>, Error> {
|
||||||
|
let mut explain = format!("EXPLAIN (VERBOSE, FORMAT JSON) EXECUTE sqlx_s_{}", stmt_id);
|
||||||
|
let mut comma = false;
|
||||||
|
|
||||||
|
if params_len > 0 {
|
||||||
|
explain += "(";
|
||||||
|
|
||||||
|
// fill the arguments list with NULL, which should theoretically be valid
|
||||||
|
for _ in 0..params_len {
|
||||||
|
if comma {
|
||||||
|
explain += ", ";
|
||||||
|
}
|
||||||
|
|
||||||
|
explain += "NULL";
|
||||||
|
comma = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
explain += ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
let (Json([explain]),): (Json<[Explain; 1]>,) = query_as(&explain).fetch_one(self).await?;
|
||||||
|
|
||||||
|
let mut nullables = Vec::new();
|
||||||
|
|
||||||
|
if let Some(outputs) = &explain.plan.output {
|
||||||
|
nullables.resize(outputs.len(), None);
|
||||||
|
visit_plan(&explain.plan, outputs, &mut nullables);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(nullables)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn visit_plan(plan: &Plan, outputs: &[String], nullables: &mut Vec<Option<bool>>) {
|
||||||
|
if let Some(plan_outputs) = &plan.output {
|
||||||
|
// all outputs of a Full Join must be marked nullable
|
||||||
|
// otherwise, all outputs of the inner half of an outer join must be marked nullable
|
||||||
|
if let Some("Full") | Some("Inner") = plan
|
||||||
|
.join_type
|
||||||
|
.as_deref()
|
||||||
|
.or(plan.parent_relation.as_deref())
|
||||||
|
{
|
||||||
|
for output in plan_outputs {
|
||||||
|
if let Some(i) = outputs.iter().position(|o| o == output) {
|
||||||
|
// N.B. this may produce false positives but those don't cause runtime errors
|
||||||
|
nullables[i] = Some(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(plans) = &plan.plans {
|
||||||
|
if let Some("Left") | Some("Right") = plan.join_type.as_deref() {
|
||||||
|
for plan in plans {
|
||||||
|
visit_plan(plan, outputs, nullables);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Deserialize)]
|
||||||
|
struct Explain {
|
||||||
|
#[serde(rename = "Plan")]
|
||||||
|
plan: Plan,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(serde::Deserialize)]
|
||||||
|
struct Plan {
|
||||||
|
#[serde(rename = "Join Type")]
|
||||||
|
join_type: Option<String>,
|
||||||
|
#[serde(rename = "Parent Relationship")]
|
||||||
|
parent_relation: Option<String>,
|
||||||
|
#[serde(rename = "Output")]
|
||||||
|
output: Option<Vec<String>>,
|
||||||
|
#[serde(rename = "Plans")]
|
||||||
|
plans: Option<Vec<Plan>>,
|
||||||
|
}
|
||||||
|
@ -415,9 +415,9 @@ impl<'c> Executor<'c> for &'c mut PgConnection {
|
|||||||
Box::pin(async move {
|
Box::pin(async move {
|
||||||
self.wait_until_ready().await?;
|
self.wait_until_ready().await?;
|
||||||
|
|
||||||
let (_, metadata) = self.get_or_prepare(sql, &[], true, None).await?;
|
let (stmt_id, metadata) = self.get_or_prepare(sql, &[], true, None).await?;
|
||||||
|
|
||||||
let nullable = self.get_nullable_for_columns(&metadata.columns).await?;
|
let nullable = self.get_nullable_for_columns(stmt_id, &metadata).await?;
|
||||||
|
|
||||||
Ok(Describe {
|
Ok(Describe {
|
||||||
columns: metadata.columns.clone(),
|
columns: metadata.columns.clone(),
|
||||||
|
@ -30,6 +30,7 @@ impl PgBufMutExt for Vec<u8> {
|
|||||||
// writes a statement name by ID
|
// writes a statement name by ID
|
||||||
#[inline]
|
#[inline]
|
||||||
fn put_statement_name(&mut self, id: u32) {
|
fn put_statement_name(&mut self, id: u32) {
|
||||||
|
// N.B. if you change this don't forget to update it in ../describe.rs
|
||||||
self.extend(b"sqlx_s_");
|
self.extend(b"sqlx_s_");
|
||||||
|
|
||||||
itoa::write(&mut *self, id).unwrap();
|
itoa::write(&mut *self, id).unwrap();
|
||||||
|
@ -178,15 +178,12 @@
|
|||||||
/// ##### Force Nullable
|
/// ##### Force Nullable
|
||||||
/// Selecting a column `foo as "foo?"` (Postgres / SQLite) or `` foo as `foo?` `` (MySQL) overrides
|
/// Selecting a column `foo as "foo?"` (Postgres / SQLite) or `` foo as `foo?` `` (MySQL) overrides
|
||||||
/// inferred nullability and forces the column to be treated as nullable; this is provided mainly
|
/// inferred nullability and forces the column to be treated as nullable; this is provided mainly
|
||||||
/// for symmetry with `!`, but also because nullability inference currently has some holes and false
|
/// for symmetry with `!`.
|
||||||
/// negatives that may not be completely fixable without doing our own complex analysis on the given
|
|
||||||
/// query.
|
|
||||||
///
|
///
|
||||||
/// ```rust,ignore
|
/// ```rust,ignore
|
||||||
/// # async fn main() {
|
/// # async fn main() {
|
||||||
/// # let mut conn = panic!();
|
/// # let mut conn = panic!();
|
||||||
/// // Postgres:
|
/// // Postgres/SQLite:
|
||||||
/// // Note that this query wouldn't work in SQLite as we still don't know the exact type of `id`
|
|
||||||
/// let record = sqlx::query!(r#"select 1 as "id?""#) // MySQL: use "select 1 as `id?`" instead
|
/// let record = sqlx::query!(r#"select 1 as "id?""#) // MySQL: use "select 1 as `id?`" instead
|
||||||
/// .fetch_one(&mut conn)
|
/// .fetch_one(&mut conn)
|
||||||
/// .await?;
|
/// .await?;
|
||||||
@ -197,30 +194,45 @@
|
|||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// One current such hole is exposed by left-joins involving `NOT NULL` columns in Postgres and
|
/// MySQL should be accurate with regards to nullability as it directly tells us when a column is
|
||||||
/// SQLite; as we only know nullability for a given column based on the `NOT NULL` constraint
|
/// expected to never be `NULL`. Any mistakes should be considered a bug in MySQL.
|
||||||
/// of its original column in a table, if that column is then brought in via a `LEFT JOIN`
|
///
|
||||||
/// we have no good way to know and so continue assuming it may not be null which may result
|
/// However, inference in SQLite and Postgres is more fragile as it depends primarily on observing
|
||||||
/// in some `UnexpectedNull` errors at runtime.
|
/// `NOT NULL` constraints on columns. If a `NOT NULL` column is brought in by a `LEFT JOIN` then
|
||||||
|
/// that column may be `NULL` if its row does not satisfy the join condition. Similarly, a
|
||||||
|
/// `FULL JOIN` or `RIGHT JOIN` may generate rows from the primary table that are all `NULL`.
|
||||||
|
///
|
||||||
|
/// Unfortunately, the result of mistakes in inference is a `UnexpectedNull` error at runtime.
|
||||||
|
///
|
||||||
|
/// In Postgres, we patch up this inference by analyzing `EXPLAIN VERBOSE` output (which is not
|
||||||
|
/// well documented, is highly dependent on the query plan that Postgres generates, and may differ
|
||||||
|
/// between releases) to find columns that are the result of left/right/full outer joins. This
|
||||||
|
/// analysis errs on the side of producing false positives (marking columns nullable that are not
|
||||||
|
/// in practice) but there are likely edge cases that it does not cover yet.
|
||||||
///
|
///
|
||||||
/// Using `?` as an override we can fix this for columns we know to be nullable in practice:
|
/// Using `?` as an override we can fix this for columns we know to be nullable in practice:
|
||||||
///
|
///
|
||||||
/// ```rust,ignore
|
/// ```rust,ignore
|
||||||
/// # async fn main() {
|
/// # async fn main() {
|
||||||
/// # let mut conn = panic!();
|
/// # let mut conn = panic!();
|
||||||
/// // Ironically this is the exact column we look at to determine nullability in Postgres
|
/// // Ironically this is the exact column we primarily look at to determine nullability in Postgres
|
||||||
/// let record = sqlx::query!(
|
/// let record = sqlx::query!(
|
||||||
/// r#"select attnotnull as "attnotnull?" from (values (1)) ids left join pg_attribute on false"#
|
/// r#"select attnotnull as "attnotnull?" from (values (1)) ids left join pg_attribute on false"#
|
||||||
/// )
|
/// )
|
||||||
/// .fetch_one(&mut conn)
|
/// .fetch_one(&mut conn)
|
||||||
/// .await?;
|
/// .await?;
|
||||||
///
|
///
|
||||||
/// // For Postgres this would have been inferred to be `bool` and we would have gotten an error
|
/// // Although we do our best, under Postgres this might have been inferred to be `bool`
|
||||||
|
/// // In that case, we would have gotten an error
|
||||||
/// assert_eq!(record.attnotnull, None);
|
/// assert_eq!(record.attnotnull, None);
|
||||||
/// # }
|
/// # }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// See [launchbadge/sqlx#367](https://github.com/launchbadge/sqlx/issues/367) for more details on this issue.
|
/// If you find that you need to use this override, please open an issue with a query we can use
|
||||||
|
/// to reproduce the problem. For Postgres users, especially helpful would be the output of
|
||||||
|
/// `EXPLAIN (VERBOSE, FORMAT JSON) <your query>` with bind parameters substituted in the query
|
||||||
|
/// (as the exact value of bind parameters can change the query plan)
|
||||||
|
/// and the definitions of any relevant tables (or sufficiently anonymized equivalents).
|
||||||
///
|
///
|
||||||
/// ##### Force a Different/Custom Type
|
/// ##### Force a Different/Custom Type
|
||||||
/// Selecting a column `foo as "foo: T"` (Postgres / SQLite) or `` foo as `foo: T` `` (MySQL)
|
/// Selecting a column `foo as "foo: T"` (Postgres / SQLite) or `` foo as `foo: T` `` (MySQL)
|
||||||
|
@ -799,3 +799,76 @@ async fn test_issue_622() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[sqlx_macros::test]
|
||||||
|
async fn test_describe_outer_join_nullable() -> anyhow::Result<()> {
|
||||||
|
let mut conn = new::<Postgres>().await?;
|
||||||
|
|
||||||
|
// test nullability inference for various joins
|
||||||
|
|
||||||
|
// inner join, nullability should not be overridden
|
||||||
|
// language=PostgreSQL
|
||||||
|
let describe = conn
|
||||||
|
.describe(
|
||||||
|
"select tweet.id
|
||||||
|
from (values (null)) vals(val)
|
||||||
|
inner join tweet on false",
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
assert_eq!(describe.nullable(0), Some(false));
|
||||||
|
|
||||||
|
// language=PostgreSQL
|
||||||
|
let describe = conn
|
||||||
|
.describe(
|
||||||
|
"select tweet.id
|
||||||
|
from (values (null)) vals(val)
|
||||||
|
left join tweet on false",
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
// tweet.id is marked NOT NULL but it's brought in from a left-join here
|
||||||
|
// which should make it nullable
|
||||||
|
assert_eq!(describe.nullable(0), Some(true));
|
||||||
|
|
||||||
|
// make sure we don't mis-infer for the outer half of the join
|
||||||
|
// language=PostgreSQL
|
||||||
|
let describe = conn
|
||||||
|
.describe(
|
||||||
|
"select tweet1.id, tweet2.id
|
||||||
|
from tweet tweet1
|
||||||
|
left join tweet tweet2 on false",
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
assert_eq!(describe.nullable(0), Some(false));
|
||||||
|
assert_eq!(describe.nullable(1), Some(true));
|
||||||
|
|
||||||
|
// right join, nullability should be inverted
|
||||||
|
// language=PostgreSQL
|
||||||
|
let describe = conn
|
||||||
|
.describe(
|
||||||
|
"select tweet1.id, tweet2.id
|
||||||
|
from tweet tweet1
|
||||||
|
right join tweet tweet2 on false",
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
assert_eq!(describe.nullable(0), Some(true));
|
||||||
|
assert_eq!(describe.nullable(1), Some(false));
|
||||||
|
|
||||||
|
// full outer join, both tables are nullable
|
||||||
|
// language=PostgreSQL
|
||||||
|
let describe = conn
|
||||||
|
.describe(
|
||||||
|
"select tweet1.id, tweet2.id
|
||||||
|
from tweet tweet1
|
||||||
|
full join tweet tweet2 on false",
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
assert_eq!(describe.nullable(0), Some(true));
|
||||||
|
assert_eq!(describe.nullable(1), Some(true));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user