Fugue and PyArrow Types
Fugue and PyArrow Types#
We can use Fugue SQL to generate Fugue -> PyArrow type table.
import triad
from fugue_sql import FugueSQLWorkflow
from typing import List, Any
#schema: fugue_type_expr:str, pa_type:str
def type_to_expr(primary:bool=False) -> List[List[Any]]:
if not primary:
return [[k,str(v)] for k,v in triad.utils.pyarrow._TYPE_EXPRESSION_MAPPING.items()]
else:
return [[v,str(k)] for k,v in triad.utils.pyarrow._TYPE_EXPRESSION_R_MAPPING.items()]
with FugueSQLWorkflow() as dag:
dag("""
f2p = CREATE USING type_to_expr
f2p_primary = CREATE USING type_to_expr(primary=true)
SELECT CASE WHEN f2p_primary.pa_type IS NOT NULL THEN "YES" ELSE "." END AS is_primary,f2p.*
FROM f2p LEFT OUTER JOIN f2p_primary
ON f2p.fugue_type_expr=f2p_primary.fugue_type_expr
ORDER BY pa_type, is_primary DESC
PRINT 100 ROWS
""")
PandasDataFrame
is_primary:str|fugue_type_expr:str|pa_type:str
--------------+-------------------+-----------------------------------------------------------------
YES |bytes |binary
. |binary |binary
YES |bool |bool
. |boolean |bool
YES |date |date32[day]
YES |double |double
. |float64 |double
YES |float |float
. |float32 |float
YES |float16 |halffloat
YES |short |int16
. |int16 |int16
YES |int |int32
. |int32 |int32
YES |long |int64
. |int64 |int64
YES |byte |int8
. |int8 |int8
YES |null |null
YES |str |string
. |string |string
YES |datetime |timestamp[us]
YES |ushort |uint16
. |uint16 |uint16
YES |uint |uint32
. |uint32 |uint32
YES |ulong |uint64
. |uint64 |uint64
YES |ubyte |uint8
. |uint8 |uint8
Total count: 30