mirror of
https://github.com/launchbadge/sqlx.git
synced 2025-10-02 15:25:32 +00:00
fix(postgres): patch nullable inference in Postgres using EXPLAIN
BREAKING CHANGE: some columns in `query!()` et. al. output will change from `T` to `Option<T>` breakage should be minimal in practice as these columns will need to have been manually overridden anyway to avoid runtime errors Signed-off-by: Austin Bonander <austin@launchbadge.com>
This commit is contained in:
parent
6b036055e5
commit
89305873b0
@ -21,7 +21,7 @@ migrate = [ "sha2", "crc" ]
|
||||
|
||||
# databases
|
||||
all-databases = [ "postgres", "mysql", "sqlite", "mssql", "any" ]
|
||||
postgres = [ "md-5", "sha2", "base64", "sha-1", "rand", "hmac", "futures-channel/sink", "futures-util/sink" ]
|
||||
postgres = [ "md-5", "sha2", "base64", "sha-1", "rand", "hmac", "futures-channel/sink", "futures-util/sink", "json" ]
|
||||
mysql = [ "sha-1", "sha2", "generic-array", "num-bigint", "base64", "digest", "rand", "rsa" ]
|
||||
sqlite = [ "libsqlite3-sys" ]
|
||||
mssql = [ "uuid", "encoding_rs", "regex" ]
|
||||
|
@ -1,11 +1,13 @@
|
||||
use crate::error::Error;
|
||||
use crate::ext::ustr::UStr;
|
||||
use crate::postgres::message::{ParameterDescription, RowDescription};
|
||||
use crate::postgres::statement::PgStatementMetadata;
|
||||
use crate::postgres::type_info::{PgCustomType, PgType, PgTypeKind};
|
||||
use crate::postgres::{PgArguments, PgColumn, PgConnection, PgTypeInfo};
|
||||
use crate::query_as::query_as;
|
||||
use crate::query_scalar::{query_scalar, query_scalar_with};
|
||||
use crate::HashMap;
|
||||
use crate::types::Json;
|
||||
use futures_core::future::BoxFuture;
|
||||
use std::fmt::Write;
|
||||
use std::sync::Arc;
|
||||
@ -247,22 +249,23 @@ SELECT oid FROM pg_catalog.pg_type WHERE typname ILIKE $1
|
||||
|
||||
pub(crate) async fn get_nullable_for_columns(
|
||||
&mut self,
|
||||
columns: &[PgColumn],
|
||||
stmt_id: u32,
|
||||
meta: &PgStatementMetadata,
|
||||
) -> Result<Vec<Option<bool>>, Error> {
|
||||
if columns.is_empty() {
|
||||
if meta.columns.is_empty() {
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let mut query = String::from("SELECT NOT pg_attribute.attnotnull FROM (VALUES ");
|
||||
let mut nullable_query = String::from("SELECT NOT pg_attribute.attnotnull FROM (VALUES ");
|
||||
let mut args = PgArguments::default();
|
||||
|
||||
for (i, (column, bind)) in columns.iter().zip((1..).step_by(3)).enumerate() {
|
||||
for (i, (column, bind)) in meta.columns.iter().zip((1..).step_by(3)).enumerate() {
|
||||
if !args.buffer.is_empty() {
|
||||
query += ", ";
|
||||
nullable_query += ", ";
|
||||
}
|
||||
|
||||
let _ = write!(
|
||||
query,
|
||||
nullable_query,
|
||||
"(${}::int4, ${}::int4, ${}::int2)",
|
||||
bind,
|
||||
bind + 1,
|
||||
@ -274,7 +277,7 @@ SELECT oid FROM pg_catalog.pg_type WHERE typname ILIKE $1
|
||||
args.add(column.relation_attribute_no);
|
||||
}
|
||||
|
||||
query.push_str(
|
||||
nullable_query.push_str(
|
||||
") as col(idx, table_id, col_idx) \
|
||||
LEFT JOIN pg_catalog.pg_attribute \
|
||||
ON table_id IS NOT NULL \
|
||||
@ -283,8 +286,104 @@ SELECT oid FROM pg_catalog.pg_type WHERE typname ILIKE $1
|
||||
ORDER BY col.idx",
|
||||
);
|
||||
|
||||
query_scalar_with::<_, Option<bool>, _>(&query, args)
|
||||
.fetch_all(self)
|
||||
.await
|
||||
let mut nullables = query_scalar_with::<_, Option<bool>, _>(&nullable_query, args)
|
||||
.fetch_all(&mut *self)
|
||||
.await?;
|
||||
|
||||
// patch up our null inference with data from EXPLAIN
|
||||
let nullable_patch = self
|
||||
.nullables_from_explain(stmt_id, meta.parameters.len())
|
||||
.await?;
|
||||
|
||||
for (nullable, patch) in nullables.iter_mut().zip(nullable_patch) {
|
||||
*nullable = patch.or(*nullable);
|
||||
}
|
||||
|
||||
Ok(nullables)
|
||||
}
|
||||
|
||||
/// Infer nullability for columns of this statement using EXPLAIN VERBOSE.
|
||||
///
|
||||
/// This currently only marks columns that are on the inner half of an outer join
|
||||
/// and returns `None` for all others.
|
||||
async fn nullables_from_explain(
|
||||
&mut self,
|
||||
stmt_id: u32,
|
||||
params_len: usize,
|
||||
) -> Result<Vec<Option<bool>>, Error> {
|
||||
let mut explain = format!("EXPLAIN (VERBOSE, FORMAT JSON) EXECUTE sqlx_s_{}", stmt_id);
|
||||
let mut comma = false;
|
||||
|
||||
if params_len > 0 {
|
||||
explain += "(";
|
||||
|
||||
// fill the arguments list with NULL, which should theoretically be valid
|
||||
for _ in 0..params_len {
|
||||
if comma {
|
||||
explain += ", ";
|
||||
}
|
||||
|
||||
explain += "NULL";
|
||||
comma = true;
|
||||
}
|
||||
|
||||
explain += ")";
|
||||
}
|
||||
|
||||
let (Json([explain]),): (Json<[Explain; 1]>,) = query_as(&explain).fetch_one(self).await?;
|
||||
|
||||
let mut nullables = Vec::new();
|
||||
|
||||
if let Some(outputs) = &explain.plan.output {
|
||||
nullables.resize(outputs.len(), None);
|
||||
visit_plan(&explain.plan, outputs, &mut nullables);
|
||||
}
|
||||
|
||||
Ok(nullables)
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_plan(plan: &Plan, outputs: &[String], nullables: &mut Vec<Option<bool>>) {
|
||||
if let Some(plan_outputs) = &plan.output {
|
||||
// all outputs of a Full Join must be marked nullable
|
||||
// otherwise, all outputs of the inner half of an outer join must be marked nullable
|
||||
if let Some("Full") | Some("Inner") = plan
|
||||
.join_type
|
||||
.as_deref()
|
||||
.or(plan.parent_relation.as_deref())
|
||||
{
|
||||
for output in plan_outputs {
|
||||
if let Some(i) = outputs.iter().position(|o| o == output) {
|
||||
// N.B. this may produce false positives but those don't cause runtime errors
|
||||
nullables[i] = Some(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(plans) = &plan.plans {
|
||||
if let Some("Left") | Some("Right") = plan.join_type.as_deref() {
|
||||
for plan in plans {
|
||||
visit_plan(plan, outputs, nullables);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct Explain {
|
||||
#[serde(rename = "Plan")]
|
||||
plan: Plan,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
struct Plan {
|
||||
#[serde(rename = "Join Type")]
|
||||
join_type: Option<String>,
|
||||
#[serde(rename = "Parent Relationship")]
|
||||
parent_relation: Option<String>,
|
||||
#[serde(rename = "Output")]
|
||||
output: Option<Vec<String>>,
|
||||
#[serde(rename = "Plans")]
|
||||
plans: Option<Vec<Plan>>,
|
||||
}
|
||||
|
@ -415,9 +415,9 @@ impl<'c> Executor<'c> for &'c mut PgConnection {
|
||||
Box::pin(async move {
|
||||
self.wait_until_ready().await?;
|
||||
|
||||
let (_, metadata) = self.get_or_prepare(sql, &[], true, None).await?;
|
||||
let (stmt_id, metadata) = self.get_or_prepare(sql, &[], true, None).await?;
|
||||
|
||||
let nullable = self.get_nullable_for_columns(&metadata.columns).await?;
|
||||
let nullable = self.get_nullable_for_columns(stmt_id, &metadata).await?;
|
||||
|
||||
Ok(Describe {
|
||||
columns: metadata.columns.clone(),
|
||||
|
@ -30,6 +30,7 @@ impl PgBufMutExt for Vec<u8> {
|
||||
// writes a statement name by ID
|
||||
#[inline]
|
||||
fn put_statement_name(&mut self, id: u32) {
|
||||
// N.B. if you change this don't forget to update it in ../describe.rs
|
||||
self.extend(b"sqlx_s_");
|
||||
|
||||
itoa::write(&mut *self, id).unwrap();
|
||||
|
@ -178,15 +178,12 @@
|
||||
/// ##### Force Nullable
|
||||
/// Selecting a column `foo as "foo?"` (Postgres / SQLite) or `` foo as `foo?` `` (MySQL) overrides
|
||||
/// inferred nullability and forces the column to be treated as nullable; this is provided mainly
|
||||
/// for symmetry with `!`, but also because nullability inference currently has some holes and false
|
||||
/// negatives that may not be completely fixable without doing our own complex analysis on the given
|
||||
/// query.
|
||||
/// for symmetry with `!`.
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// # async fn main() {
|
||||
/// # let mut conn = panic!();
|
||||
/// // Postgres:
|
||||
/// // Note that this query wouldn't work in SQLite as we still don't know the exact type of `id`
|
||||
/// // Postgres/SQLite:
|
||||
/// let record = sqlx::query!(r#"select 1 as "id?""#) // MySQL: use "select 1 as `id?`" instead
|
||||
/// .fetch_one(&mut conn)
|
||||
/// .await?;
|
||||
@ -197,30 +194,45 @@
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// One current such hole is exposed by left-joins involving `NOT NULL` columns in Postgres and
|
||||
/// SQLite; as we only know nullability for a given column based on the `NOT NULL` constraint
|
||||
/// of its original column in a table, if that column is then brought in via a `LEFT JOIN`
|
||||
/// we have no good way to know and so continue assuming it may not be null which may result
|
||||
/// in some `UnexpectedNull` errors at runtime.
|
||||
/// MySQL should be accurate with regards to nullability as it directly tells us when a column is
|
||||
/// expected to never be `NULL`. Any mistakes should be considered a bug in MySQL.
|
||||
///
|
||||
/// However, inference in SQLite and Postgres is more fragile as it depends primarily on observing
|
||||
/// `NOT NULL` constraints on columns. If a `NOT NULL` column is brought in by a `LEFT JOIN` then
|
||||
/// that column may be `NULL` if its row does not satisfy the join condition. Similarly, a
|
||||
/// `FULL JOIN` or `RIGHT JOIN` may generate rows from the primary table that are all `NULL`.
|
||||
///
|
||||
/// Unfortunately, the result of mistakes in inference is a `UnexpectedNull` error at runtime.
|
||||
///
|
||||
/// In Postgres, we patch up this inference by analyzing `EXPLAIN VERBOSE` output (which is not
|
||||
/// well documented, is highly dependent on the query plan that Postgres generates, and may differ
|
||||
/// between releases) to find columns that are the result of left/right/full outer joins. This
|
||||
/// analysis errs on the side of producing false positives (marking columns nullable that are not
|
||||
/// in practice) but there are likely edge cases that it does not cover yet.
|
||||
///
|
||||
/// Using `?` as an override we can fix this for columns we know to be nullable in practice:
|
||||
///
|
||||
/// ```rust,ignore
|
||||
/// # async fn main() {
|
||||
/// # let mut conn = panic!();
|
||||
/// // Ironically this is the exact column we look at to determine nullability in Postgres
|
||||
/// // Ironically this is the exact column we primarily look at to determine nullability in Postgres
|
||||
/// let record = sqlx::query!(
|
||||
/// r#"select attnotnull as "attnotnull?" from (values (1)) ids left join pg_attribute on false"#
|
||||
/// )
|
||||
/// .fetch_one(&mut conn)
|
||||
/// .await?;
|
||||
///
|
||||
/// // For Postgres this would have been inferred to be `bool` and we would have gotten an error
|
||||
/// // Although we do our best, under Postgres this might have been inferred to be `bool`
|
||||
/// // In that case, we would have gotten an error
|
||||
/// assert_eq!(record.attnotnull, None);
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// See [launchbadge/sqlx#367](https://github.com/launchbadge/sqlx/issues/367) for more details on this issue.
|
||||
/// If you find that you need to use this override, please open an issue with a query we can use
|
||||
/// to reproduce the problem. For Postgres users, especially helpful would be the output of
|
||||
/// `EXPLAIN (VERBOSE, FORMAT JSON) <your query>` with bind parameters substituted in the query
|
||||
/// (as the exact value of bind parameters can change the query plan)
|
||||
/// and the definitions of any relevant tables (or sufficiently anonymized equivalents).
|
||||
///
|
||||
/// ##### Force a Different/Custom Type
|
||||
/// Selecting a column `foo as "foo: T"` (Postgres / SQLite) or `` foo as `foo: T` `` (MySQL)
|
||||
|
@ -799,3 +799,76 @@ async fn test_issue_622() -> anyhow::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[sqlx_macros::test]
|
||||
async fn test_describe_outer_join_nullable() -> anyhow::Result<()> {
|
||||
let mut conn = new::<Postgres>().await?;
|
||||
|
||||
// test nullability inference for various joins
|
||||
|
||||
// inner join, nullability should not be overridden
|
||||
// language=PostgreSQL
|
||||
let describe = conn
|
||||
.describe(
|
||||
"select tweet.id
|
||||
from (values (null)) vals(val)
|
||||
inner join tweet on false",
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(describe.nullable(0), Some(false));
|
||||
|
||||
// language=PostgreSQL
|
||||
let describe = conn
|
||||
.describe(
|
||||
"select tweet.id
|
||||
from (values (null)) vals(val)
|
||||
left join tweet on false",
|
||||
)
|
||||
.await?;
|
||||
|
||||
// tweet.id is marked NOT NULL but it's brought in from a left-join here
|
||||
// which should make it nullable
|
||||
assert_eq!(describe.nullable(0), Some(true));
|
||||
|
||||
// make sure we don't mis-infer for the outer half of the join
|
||||
// language=PostgreSQL
|
||||
let describe = conn
|
||||
.describe(
|
||||
"select tweet1.id, tweet2.id
|
||||
from tweet tweet1
|
||||
left join tweet tweet2 on false",
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(describe.nullable(0), Some(false));
|
||||
assert_eq!(describe.nullable(1), Some(true));
|
||||
|
||||
// right join, nullability should be inverted
|
||||
// language=PostgreSQL
|
||||
let describe = conn
|
||||
.describe(
|
||||
"select tweet1.id, tweet2.id
|
||||
from tweet tweet1
|
||||
right join tweet tweet2 on false",
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(describe.nullable(0), Some(true));
|
||||
assert_eq!(describe.nullable(1), Some(false));
|
||||
|
||||
// full outer join, both tables are nullable
|
||||
// language=PostgreSQL
|
||||
let describe = conn
|
||||
.describe(
|
||||
"select tweet1.id, tweet2.id
|
||||
from tweet tweet1
|
||||
full join tweet tweet2 on false",
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(describe.nullable(0), Some(true));
|
||||
assert_eq!(describe.nullable(1), Some(true));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user