Python x DuckDB
# DuckDB executando consulta no dataframe
results = duckdb.sql("SELECT DISTINCT MUNICIPIO_NOME_3 FROM df ORDER BY 1").df()
# Executando a consulta diretamente no arquivo csv
conn = duckdb.connect()
sql_query = f"""
SELECT
COUNT(*) as row_count
FROM read_csv('{in_file}')
"""
result = conn.execute(sql_query).df()
result.head()
# Executando a consulta diretamente no arquivo csv
conn = duckdb.connect()
sql_query = f"""
SELECT DISTINCT MUNICIPIO_NOME_3
FROM read_csv('{in_file}')
ORDER BY 1
"""
result = conn.execute(sql_query).df()
conn.close()
result.head()
# Como salvar dados no DuckDB
# Nesse exemplo os dados são consumidos diretamente do dataframe
con = duckdb.connect(out_file_votacao_datawarehouse)
con.sql("CREATE TABLE votacao_ce AS SELECT * FROM df")
con.close()
# Para ler os dados
con = duckdb.connect(out_file_votacao_datawarehouse)
duck_df = con.sql("SELECT DISTINCT MUNICIPIO_NOME_3 FROM votacao_ce ORDER BY 1 LIMIT 10").df()
con.close()
duck_df.head()
results.to_csv(out_file_votacao_dep_fed, index=False)
with pd.ExcelWriter(out_file_report) as writer:
votacao_deputado.to_excel(writer, sheet_name="votacao_deputado")
votacao_cidade.to_excel(writer, sheet_name="votacao_cidade")
Para ler arquivos zipados com DuckDB
Steps to Read Files from ZIP Archives
1. Install and Load the zipfs Extension To enable ZIP file reading, install and load the zipfs extension:
INSTALL zipfs FROM community;
LOAD zipfs;
2. Query Files Inside a ZIP Archive Use the zip:// URL scheme to specify the ZIP file and the file path within it:
SELECT * FROM 'zip://path/to/archive.zip/filename.csv';
Replace path/to/archive.zip with the path to your ZIP file.
Replace filename.csv with the name of the file inside the archive.
3. Globbing for Multiple Files You can query multiple files using glob patterns:
SELECT * FROM 'zip://path/to/archive.zip/*.csv';
This retrieves all .csv files within the specified ZIP archive.