Home¶

8 de julho de 2025
2 min de leitura

Python x DuckDB

# DuckDB executando consulta no dataframe
results = duckdb.sql("SELECT DISTINCT MUNICIPIO_NOME_3 FROM df ORDER BY 1").df()

# Executando a consulta diretamente no arquivo csv
conn = duckdb.connect()
sql_query = f"""
SELECT
    COUNT(*) as row_count
FROM read_csv('{in_file}')  
"""

result = conn.execute(sql_query).df()
result.head()


# Executando a consulta diretamente no arquivo csv
conn = duckdb.connect()
sql_query = f"""
SELECT DISTINCT MUNICIPIO_NOME_3
FROM read_csv('{in_file}')  
ORDER BY 1
"""
result = conn.execute(sql_query).df()
conn.close()
result.head()



# Como salvar dados no DuckDB
# Nesse exemplo os dados são consumidos diretamente do dataframe
con = duckdb.connect(out_file_votacao_datawarehouse)
con.sql("CREATE TABLE votacao_ce AS SELECT * FROM df")
con.close()



# Para ler os dados
con = duckdb.connect(out_file_votacao_datawarehouse)
duck_df = con.sql("SELECT DISTINCT MUNICIPIO_NOME_3 FROM votacao_ce ORDER BY 1 LIMIT 10").df()
con.close()

duck_df.head()



results.to_csv(out_file_votacao_dep_fed, index=False)




with pd.ExcelWriter(out_file_report) as writer:
    votacao_deputado.to_excel(writer, sheet_name="votacao_deputado")  
    votacao_cidade.to_excel(writer, sheet_name="votacao_cidade")

Para ler arquivos zipados com DuckDB

Steps to Read Files from ZIP Archives

1. Install and Load the zipfs Extension To enable ZIP file reading, install and load the zipfs extension:

INSTALL zipfs FROM community;
LOAD zipfs;

2. Query Files Inside a ZIP Archive Use the zip:// URL scheme to specify the ZIP file and the file path within it:

SELECT * FROM 'zip://path/to/archive.zip/filename.csv';
Replace path/to/archive.zip with the path to your ZIP file.

Replace filename.csv with the name of the file inside the archive.

3. Globbing for Multiple Files You can query multiple files using glob patterns:

SELECT * FROM 'zip://path/to/archive.zip/*.csv';
This retrieves all .csv files within the specified ZIP archive.

27 de fevereiro de 2025
1 min de leitura

Atualização condicional

Estratégia 1

data BASE_PAINEL_MPE_PONTO_BB_IND2;
    set BASE_PAINEL_MPE_PONTO_BB_IND;
    if periodo = 202412 and nome_acesso = 'Painel PJ' then do;
        quantidade = quantidade + 1;
        output;
    end;
    else output;
run;



proc sql;
    insert into BASE_PAINEL_MPE_PONTO_BB_IND2 (periodo, nome_acesso, quantidade)
    select periodo, nome_acesso, quantidade
    from PAINELPJ_ACESSOS
    where not exists (
        select 1
        from BASE_PAINEL_MPE_PONTO_BB_IND
        where periodo = 202412 and nome_acesso = 'Painel PJ'
    );
quit;

Estratégia 2

proc sql;
    update BASE_PAINEL_MPE_PONTO_BB_IND2
    set quantidade = (select quantidade from PAINELPJ_ACESSOS)
    where periodo = 202412 
    and nome_acesso = (select nome_acesso from PAINELPJ_ACESSOS);

    /* Verifica se a atualização afetou alguma linha */
    %let rc = &sqlobs;

    /* Se nenhuma linha foi atualizada, insere uma nova linha */
    %if &rc = 0 %then %do;
        insert into BASE_PAINEL_MPE_PONTO_BB_IND2 (periodo, nome_acesso, quantidade)
        values (202412, 'Painel PJ', 123);
    %end;
quit;

Estratégia 3

/* Primeiro, verifique se o registro existe */
proc sql noprint;
    select count(*) into :record_exists
    from BASE_PAINEL_MPE_PONTO_BB_IND2
    where periodo = (select periodo from PAINELPJ_ACESSOS) 
    and nome_acesso = (select nome_acesso from PAINELPJ_ACESSOS);
quit;
%put &=record_exists;



/* Se o registro existir, atualize-o */
%if &record_exists > 0 %then %do;
    proc sql;
/*        update BASE_PAINEL_MPE_PONTO_BB_IND*/
/*        set quantidade = 100*/
/*        where periodo = 202412 and nome_acesso = 'Painel PJ';*/
    quit;
%end;
/* Caso contrário, insira um novo registro */
%else %do;
    proc sql;
        insert into BASE_PAINEL_MPE_PONTO_BB_IND2 (periodo, nome_acesso, quantidade)
        select periodo, nome_acesso, quantidade
        from PAINELPJ_ACESSOS
/*      where not exists (*/
/*          select 1*/
/*          from BASE_PAINEL_MPE_PONTO_BB_IND2*/
/*          where periodo = (select periodo from PAINELPJ_ACESSOS)  */
/*          and nome_acesso = select nome_acesso from PAINELPJ_ACESSOS*/
/*      )*/
        ;
    quit;
%end;