Python - Pandas Script Too Slow With Lambda
My code is working as i wish but is very slow when i run this line. --- newdf['Login'] = newdf['Site'].apply(lambda x : 'yes' if get(x).status_code == 200 else 'no') --- After com
Solution 1:
This Should Be a Much Faster Implementation of your Functions
from typing importOptional, Coroutine, Listimport aiohttp
from pandas import DataFrame
from pandas.errors import EmptyDataError
import pandas as pd
import asyncio
defcreate_df_form_file() -> Optional[DataFrame]:
try:
site_list = pd.read_csv('sites4.csv', sep=',')
df = pd.DataFrame(site_list, columns=['Site', 'Login'])
return df.assign(Site=df['Site'].map(str) + 'Login')
except EmptyDataError as e:
print(f'File Error: {e}')
returnNone
new_df: Optional[DataFrame] = create_df_form_file()
ifnotisinstance(new_df, DataFrame):
print("empty data goodbye")
exit(1)
# NOTE: Async Get Requestasyncdefget_request(x_url: str) -> bool:
asyncwith aiohttp.ClientSession() as session:
asyncwith session.get(url=x_url) as result:
return result.ok
# functions to test if site needs a login and if header contains json and to include Yes or Nodefneeds_login(result): return result.status == 406or result.status == 403defis_json(result): return result.headers.get('Content-Type') == 'application/json'asyncdefyes_no(x): return'yes'ifawait get_request(x) else'no'asyncdef_do_work(site_column, _headers: dict) -> Optional[DataFrame]:
asyncwith aiohttp.ClientSession() as session:
asyncwith session.get(site_column['Site'], headers=_headers) as result:
if is_json(result) or needs_login(result):
# Appending Yes No
site_column['login'] = site_column['Site'].apply(yes_no)
print(site_column['Site'] + ' é Login')
print(result)
return site_column
returnNonedefget_results():
global new_df
headers = {'Content-Type': 'application/json'}
try:
_coro: List[Coroutine] = [_do_work(site_column, _headers=headers) for site_column in new_df['Site']]
except KeyError:
print("please insure your input file is accurate")
exit(1)
try:
event_loop = asyncio.get_event_loop()
except RuntimeError:
asyncio.set_event_loop(asyncio.new_event_loop())
event_loop = asyncio.get_event_loop()
# noinspection PyUnboundLocalVariable
results = event_loop.run_until_complete(*_coro)
print(results)
if __name__ == '__main__':
get_results()
for more information on python programming please visit my tutorial site here
Post a Comment for "Python - Pandas Script Too Slow With Lambda"