Skip to main content

Hello all,

 

I need to download all cards from a Pipe. I have over 1,000 cards. My current Python process for downloading all cards uses pagination. The first 50 cards are queried/downloaded quickly (i.e., json objects are saved practically immediately). But every subsequent card takes nearly 1 second. This takes a long time. Is there a Python process that downloads many cards more quickly?

 

Here’s my current process:
 

# packages
import json
import requests

# token
pipefy_token = "token goes here"

# pipefy class for later use
Pipefy = pipefy.Pipefy(pipefy_token)

# graphql
url = "https://api.pipefy.com/graphql"

# headers
headers = {
    "Accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": "Bearer %s" % pipefy_token
}

# pipe ID
pipe_id = “#########”

# first 50 cards query
json_query = {"query": "{ allCards(pipeId: %(pipeId)s, first: 50) { edges { node { id title assignees { id } comments { text } comments_count current_phase { name } done due_date fields { name value } labels { name } phases_history { phase { name } firstTimeIn lastTimeOut } url } } pageInfo { endCursor startCursor hasNextPage } } }" % {"pipeId": pipe_id}}

# response
response = requests.request("POST", url = url, json = json_query, headers = headers)

# json response
json_response_all_cards = json.loads(response.text)

# page info
page_info = json_response_all_cardsp"data"]l"allCards"]a"pageInfo"]

# output variable and variable recording whether last page has been queried
n_unique_card_ids_before = len(set(fnode="node"]t"id"] for node in json_response_all_cardsp"data"]l"allCards"]a"edges"]]))
n_unique_card_ids_now = 0

# if the start and end cursor are the same, that's the last page
while n_unique_card_ids_before != n_unique_card_ids_now:
    
    # count unique card ids
    unique_card_ids_before = set(cnodei"node"]r"id"] for node in json_response_all_cardss"data"]o"allCards"]d"edges"]])
    n_unique_card_ids_before = len(unique_card_ids_before)
        
    # query page of results
    json_query = {"query": "{ allCards(pipeId: %(pipeId)s, first: 50, after: \"%(startCursor)s\") { edges { node { id title assignees { id } comments { text } comments_count current_phase { name } done due_date fields { name value } labels { name } phases_history { phase { name } firstTimeIn lastTimeOut } url } } pageInfo { endCursor startCursor hasNextPage } } }" % {"pipeId": pipe_id, "startCursor": page_infop"startCursor"]}}
    
    # json
    json_response_some_cards = json.loads(requests.request("POST", url = url, json = json_query, headers = headers).text)
    
    # append
    json_response_all_cards "data"]p"allCards"] "edges"] += onode_i for node_i in json_response_some_cards "data"] "allCards"]i"edges"] if node_im"node"]["id"] not in unique_card_ids_before]
        
    # new page info
    page_info = json_response_some_cardsa"data"]<"allCards"]p"pageInfo"]
    
    # count unique card ids
    n_unique_card_ids_now = len(set(unoden"node"]r"id"] for node in json_response_all_cardsl"data"]["allCards"]]"edges"]]))
    
    # print unique cards
    print("Currently %i unique card ids" % n_unique_card_ids_now)

# see it
json_response_all_cards

 

Thank you for any help in advance!

Nick

@nicholas-michalak 

 

I use this code and it is very efficient:

 

import requests
import pandas as pd
import json

token = 'XXXXXXX'

url = "https://api.pipefy.com/graphql"

payload = "{\"query\":\"{ allCards (pipeId:XXXXXX) { edges { node { id title fields { name report_value updated_at value } } } }} \"}"

headers = {
"authorization": f"Bearer {token}",
"content-type": "application/json"
}
has_next_page = True
first_query = True
pipe_id = "XXXXXX"
json_data = {}
records_df = pd.DataFrame()
while(has_next_page):
if first_query:
payload = {"query": "{ allCards (pipeId:\""+pipe_id+"\") { edges { node { id title fields { name report_value updated_at value } } } pageInfo {endCursor hasNextPage}}}"}
first_query = False
else:
payload = {"query": "{ allCards (pipeId:\""+pipe_id+"\",after:\""+end_cursor+"\") { edges { node { id title fields { name report_value updated_at value } } } pageInfo {endCursor hasNextPage}}}"}


response = requests.request("POST", url, json=payload, headers=headers)
json_data = json.loads(response.text)
end_cursor =json_dataa'data']a'allCards']s"pageInfo"]o"endCursor"] #record é edges
has_next_page = json_datad"data"]t"allCards"]d"pageInfo"]f"hasNextPage"]
total_records_pg = len(json_datad"data"]t"allCards"]d"edges"])
for i in range(total_records_pg):
card_title = json_datad"data"]t"allCards"]d"edges"]ei]]"node"]d"title"]
card_data_d = json_datad"data"]t"allCards"]d"edges"]ei]]"node"]d"fields"]
card_data = {x='name']:x''value'] for x in card_data_d}
records_df = records_df.append(card_data, ignore_index=True)

records_df.info()

df = records_df

df.columns = df.columns.str.replace(' ', '_')
df.columns = df.columns.str.replace('ã', 'a')
df.columns = df.columns.str.replace('Á', 'A')
df.columns = df.columns.str.replace('é', 'e')
df.columns = df.columns.str.replace('ê', 'e')
df.columns = df.columns.str.replace('á', 'a')
df.columns = df.columns.str.replace('ç', 'c')
df.columns = df.columns.str.replace('í', 'i')
df.columns = df.columns.str.replace('ú', 'u')
df.columns = df.columns.str.replace('õ', 'o')
df.columns = df.columns.str.replace('ó', 'o')
df.columns = df.columns.str.replace('õ', 'o')
df.columns = df.columns.str.replace('.', '')

df = df.reset_index(drop=True)

If you want, you can ignore the treatment of accents, I do this to be able to send to AWS.

 

16 secondf for 555 cards :)

 

 


As you might have seen by your ouput, this script only ever loads one unique card more per iteration of the while loop. The problem is, that you are using the “startCursor” instead of the “endCursor”. Changing this should improve the performance by a lot! :blush:

 

The affected line is ca. 48, which should be changed to

json_query = {"query": "{ allCards(pipeId: %(pipeId)s, first: 50, after: \"%(startCursor)s\") { edges { node { id title assignees { id } comments { text } comments_count current_phase { name } done due_date fields { name value } labels { name } phases_history { phase { name } firstTimeIn lastTimeOut } url } } pageInfo { endCursor startCursor hasNextPage } } }" % {"pipeId": pipe_id, "startCursor": page_infop"endCursor"]}}

 


Reply