Skip to content Skip to sidebar Skip to footer

Python - Pandas Script Too Slow With Lambda

My code is working as i wish but is very slow when i run this line. --- newdf['Login'] = newdf['Site'].apply(lambda x : 'yes' if get(x).status_code == 200 else 'no') --- After com

Solution 1:

This Should Be a Much Faster Implementation of your Functions

from typing importOptional, Coroutine, Listimport aiohttp
from pandas import DataFrame
from pandas.errors import EmptyDataError
import pandas as pd
import asyncio


defcreate_df_form_file() -> Optional[DataFrame]:
    try:
        site_list = pd.read_csv('sites4.csv', sep=',')
        df = pd.DataFrame(site_list, columns=['Site', 'Login'])
        return df.assign(Site=df['Site'].map(str) + 'Login')
    except EmptyDataError as e:
        print(f'File Error: {e}')
    returnNone


new_df: Optional[DataFrame] = create_df_form_file()

ifnotisinstance(new_df, DataFrame):
    print("empty data goodbye")
    exit(1)


# NOTE: Async Get Requestasyncdefget_request(x_url: str) -> bool:
    asyncwith aiohttp.ClientSession() as session:
        asyncwith session.get(url=x_url) as result:
            return result.ok


# functions to test if site needs a login and if header contains json and to include Yes or Nodefneeds_login(result): return result.status == 406or result.status == 403defis_json(result): return result.headers.get('Content-Type') == 'application/json'asyncdefyes_no(x): return'yes'ifawait get_request(x) else'no'asyncdef_do_work(site_column, _headers: dict) -> Optional[DataFrame]:

    asyncwith aiohttp.ClientSession() as session:
        asyncwith session.get(site_column['Site'], headers=_headers) as result:
            if is_json(result) or needs_login(result):
                # Appending Yes No
                site_column['login'] = site_column['Site'].apply(yes_no)
                print(site_column['Site'] + ' é Login')
                print(result)
                return site_column
            returnNonedefget_results():
    global new_df
    headers = {'Content-Type': 'application/json'}
    try:
        _coro: List[Coroutine] = [_do_work(site_column, _headers=headers) for site_column in new_df['Site']]
    except KeyError:
        print("please insure your input file is accurate")
        exit(1)

    try:
        event_loop = asyncio.get_event_loop()
    except RuntimeError:
        asyncio.set_event_loop(asyncio.new_event_loop())
        event_loop = asyncio.get_event_loop()

    # noinspection PyUnboundLocalVariable
    results = event_loop.run_until_complete(*_coro)
    print(results)


if __name__ == '__main__':
    get_results()

for more information on python programming please visit my tutorial site here

Solution 2:

because get is func in requests.

Since requests is not an asynchronous package, all codes are interrupted until the request is completed.

If you use asyncio and aiohttp, you can improve it.

Post a Comment for "Python - Pandas Script Too Slow With Lambda"