Skip to content

Generate

generate_code(file_path, file_name, extension, sep=None, prefix='df_')

This function returns generated python code to load the files to memory using pandas.

Parameters:

Name Type Description Default
file_path str

Folder to read the file.

required
file_name str

File name.

required
extension str

Extension of the file.

required
sep str | None

Separator used in the plain file. Defaults to None.

None
prefix str

Prefix to name the dataframes. Defaults to "df_".

'df_'
Source code in src/afes/generate.py
def generate_code(
    file_path: str, file_name: str, extension: str, sep: str | None = None, prefix="df_"
):
    """This function returns generated python code to load the files to memory
    using pandas.

    Args:
        file_path (str): Folder to read the file.
        file_name (str): File name.
        extension (str): Extension of the file.
        sep (str | None, optional): Separator used in the plain file. Defaults to None.
        prefix (str, optional): Prefix to name the dataframes. Defaults to "df_".
    """

    def get_separator_char(sep):
        if sep == "space":
            separator = " "
        elif sep == "tab":
            separator = "\\t"
        elif sep == "semi_colon":
            separator = ";"
        elif sep == "comma":
            separator = ","
        elif sep == "pipe":
            separator = "|"
        else:
            separator = ","
        return separator

    df_name = (
        file_name.split(".")[0].replace(" ", "_").replace("-", "_").replace(",", "_")
    )
    if extension in PLAIN_FORMATS:
        separator = get_separator_char(sep)
        code = (
            f"""{prefix}{df_name} = pd.read_csv('{file_path}', sep = '{separator}')\n"""
        )
        return code
    elif extension in [".xlsx", ".xls"]:
        excel_name = Path(file_path).name.split(".")[0]
        excel_name += "_" + file_name
        code = """"""
        excel_name = excel_name.replace(" ", "_").replace("-", "_").replace(",", "_")
        code = f"""{prefix}{excel_name} = pd.read_excel('{file_path}', sheet_name = '{file_name}')\n"""
        return code
    else:
        return ""

generate_pandas_code(df, verbose=True, python_file='code.txt')

This functions receives the dataframe generated by explore() and generates pandas code to read each file. It writes a code.txt file with the scripts. The verbose option is to print the code to the standard output.

Parameters:

Name Type Description Default
df DataFrame

DataFrame with description of the files.

required
verbose bool

[Optional (default: True)] flag to print in the shell the code generated.

True
python_file str

[Optional (default: "code.txt")] File name for output of the code generated.

'code.txt'
Source code in src/afes/generate.py
def generate_pandas_code(
    df: pd.DataFrame, verbose: bool = True, python_file: str = "code.txt"
) -> None:
    """This functions receives the dataframe generated by `explore()` and
    generates pandas code to read each file.
    It writes a `code.txt` file with the scripts.
    The verbose option is to print the code to the standard output.

    Args:
        df (pd.DataFrame): DataFrame with description of the files.
        verbose (bool): [Optional (default: True)] flag to print in the shell
            the code generated.
        python_file (str): [Optional (default: "code.txt")] File name for output
            of the code generated.
    """
    print(f'Generating python code and saving it to "{python_file}"')
    code = """import pandas as pd\n\n"""
    for i, r in tqdm(df.iterrows(), total=len(df)):
        if r.rows > 0:
            code += generate_code(r.path, r["name"], r.extension, sep=r.separator)

    with open(python_file, "w") as f:
        f.write(code)
    if verbose:
        print("### Start of the code ###")
        print(code)
        print("### End of the code ###")

    print(f'\n"{python_file}" has the generated Python code to load the files.\n')