Fugue and PyArrow Types#

We can use Fugue SQL to generate Fugue -> PyArrow type table.

import triad
from fugue_sql import FugueSQLWorkflow
from typing import List, Any

#schema: fugue_type_expr:str, pa_type:str
def type_to_expr(primary:bool=False) -> List[List[Any]]:
    if not primary:
        return [[k,str(v)] for k,v in triad.utils.pyarrow._TYPE_EXPRESSION_MAPPING.items()]
    else:
        return [[v,str(k)] for k,v in triad.utils.pyarrow._TYPE_EXPRESSION_R_MAPPING.items()]
    
with FugueSQLWorkflow() as dag:
    dag("""
    f2p = CREATE USING type_to_expr
    f2p_primary = CREATE USING type_to_expr(primary=true)
    SELECT CASE WHEN f2p_primary.pa_type IS NOT NULL THEN "YES" ELSE "." END AS is_primary,f2p.*
        FROM f2p LEFT OUTER JOIN f2p_primary 
        ON f2p.fugue_type_expr=f2p_primary.fugue_type_expr
        ORDER BY pa_type, is_primary DESC
    
    PRINT 100 ROWS
    """)
PandasDataFrame
is_primary:str|fugue_type_expr:str|pa_type:str                                                      
--------------+-------------------+-----------------------------------------------------------------
YES           |bytes              |binary                                                           
.             |binary             |binary                                                           
YES           |bool               |bool                                                             
.             |boolean            |bool                                                             
YES           |date               |date32[day]                                                      
YES           |double             |double                                                           
.             |float64            |double                                                           
YES           |float              |float                                                            
.             |float32            |float                                                            
YES           |float16            |halffloat                                                        
YES           |short              |int16                                                            
.             |int16              |int16                                                            
YES           |int                |int32                                                            
.             |int32              |int32                                                            
YES           |long               |int64                                                            
.             |int64              |int64                                                            
YES           |byte               |int8                                                             
.             |int8               |int8                                                             
YES           |null               |null                                                             
YES           |str                |string                                                           
.             |string             |string                                                           
YES           |datetime           |timestamp[us]                                                    
YES           |ushort             |uint16                                                           
.             |uint16             |uint16                                                           
YES           |uint               |uint32                                                           
.             |uint32             |uint32                                                           
YES           |ulong              |uint64                                                           
.             |uint64             |uint64                                                           
YES           |ubyte              |uint8                                                            
.             |uint8              |uint8                                                            
Total count: 30