Storing spatial data in Delta Lake as GEOGRAPHY type
You can store geography data in a Delta Lake table in a GEOGRAPHY column. See also GEOMETRY_vs_GEOGRAPHY.
%sql
create or replace table tmp_geographies as
select
st_point(0, 0, 4326)::geography(4326) as geography,
"Null Island" as name
union all
select
st_transform(st_point(155000, 463000, 28992), 4326)::geography(4326) as geography,
"Onze Lieve Vrouwetoren" as name
union all
select
st_makepolygon(
st_makeline(
array(
st_point(- 80.1935973, 25.7741566, 4326),
st_point(- 64.7563086, 32.3040273, 4326),
st_point(- 66.1166669, 18.4653003, 4326),
st_point(- 80.1935973, 25.7741566, 4326)
)
)
)::geography(4326) as geography,
"Bermuda Triangle" as name;
describe tmp_geographies
-- Returns:
-- col_name data_type comment
-- geography geography(4326) null
-- name string null
Note the data type geography(4326)
above.
Note
Make sure you are using Serverless environment version 4+, or else outputting GEOMETRY/GEOGRAPHY types directly (without e.g. st_asewkt()
) will not work.
%sql
from
tmp_geographies
-- Returns:
-- _sqldf:pyspark.sql.connect.dataframe.DataFrame
-- geography:geography(OGC:CRS84, SPHERICAL)
-- name:string
--
-- name ewkt_geography
-- Bermuda Triangle SRID=4326;POLYGON((-80.1935973 25.7741566,-64.7563086 32.3040273,-66.1166669 18.4653003,-80.1935973 25.7741566))
-- Onze Lieve Vrouwetoren SRID=4326;POINT(5.3872035084137675 52.15517230119224)
-- Null Island SRID=4326;POINT(0 0)
Let’s calculate the distance in kilometers (on the surface of the spheroid Earth) between the two point examples above. st_distancespheroid expects GEOMETRIES of lat/lon, so we’ll need to cast.
%sql
with geoms_lonlat as (
select
geography::geometry(4326) geom_lonlat,
name
from
tmp_geographies
),
nullisland as (
select
*
from
geoms_lonlat
where
name = "Null Island"
),
onzelieve as (
select
*
from
geoms_lonlat
where
name = "Onze Lieve Vrouwetoren"
)
select
st_distancesphere(nullisland.geom_lonlat, onzelieve.geom_lonlat) / 1e3 as distance_kms
from
nullisland,
onzelieve
-- Returns:
-- distance_kms
-- 5821.233209229107
The GEOGRAPHY type also offers further functions that take lon/lat as input and produce meters as output, such as st_area. So let’s calculate the size of the Bermuda Triangle in square kms: