diff --git a/.coveragerc b/.coveragerc index 9356d3014..7c199362c 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,15 +1,15 @@ # configuration file used by run_coverage.py [run] branch = True -source = hpat +source = sdc concurrency = multiprocessing parallel = True [report] omit = - hpat/ml/* - hpat/xenon_ext.py - hpat/ros.py - hpat/cv_ext.py - hpat/tests/* + sdc/ml/* + sdc/xenon_ext.py + sdc/ros.py + sdc/cv_ext.py + sdc/tests/* diff --git a/buildscripts/build.py b/buildscripts/build.py index 957edf103..4ca78ede1 100644 --- a/buildscripts/build.py +++ b/buildscripts/build.py @@ -17,7 +17,7 @@ def run_smoke_tests(sdc_src, test_env_activate): sdc_pi_example = os.path.join(sdc_src, 'buildscripts', 'sdc_pi_example.py') - run_command(f'{test_env_activate} && python -c "import hpat"') + run_command(f'{test_env_activate} && python -c "import sdc"') run_command(f'{test_env_activate} && python {sdc_pi_example}') @@ -104,13 +104,13 @@ def run_smoke_tests(sdc_src, test_env_activate): os.chdir(../sdc_src) is a workaround for the following error: Traceback (most recent call last): File "", line 1, in - File "hpat/hpat/__init__.py", line 9, in - import hpat.dict_ext - File "hpat/hpat/dict_ext.py", line 12, in - from hpat.str_ext import string_type, gen_unicode_to_std_str, gen_std_str_to_unicode - File "hpat/hpat/str_ext.py", line 18, in + File "sdc/sdc/__init__.py", line 9, in + import sdc.dict_ext + File "sdc/sdc/dict_ext.py", line 12, in + from sdc.str_ext import string_type, gen_unicode_to_std_str, gen_std_str_to_unicode + File "sdc/sdc/str_ext.py", line 18, in from . import hstr_ext - ImportError: cannot import name 'hstr_ext' from 'hpat' (hpat/hpat/__init__.py) + ImportError: cannot import name 'hstr_ext' from 'sdc' (sdc/sdc/__init__.py) """ os.chdir(os.path.dirname(sdc_src)) diff --git a/buildscripts/sdc-conda-recipe/meta.yaml b/buildscripts/sdc-conda-recipe/meta.yaml index bc748b191..909711fc8 100644 --- a/buildscripts/sdc-conda-recipe/meta.yaml +++ b/buildscripts/sdc-conda-recipe/meta.yaml @@ -4,7 +4,7 @@ {% set ARROW_CPP_VERSION = "==0.15.0" %} package: - name: hpat + name: sdc version: {{ GIT_DESCRIBE_TAG }} source: @@ -58,13 +58,13 @@ test: - h5py - scipy imports: - - hpat + - sdc outputs: - type: conda - name: hpat + name: sdc - type: wheel - name: hpat + name: sdc requirements: build: - {{ compiler('c') }} diff --git a/buildscripts/sdc-conda-recipe/run_test.bat b/buildscripts/sdc-conda-recipe/run_test.bat index 93f6752c0..e1b39ab3d 100644 --- a/buildscripts/sdc-conda-recipe/run_test.bat +++ b/buildscripts/sdc-conda-recipe/run_test.bat @@ -4,15 +4,15 @@ set NUMBA_DEVELOPER_MODE=1 set NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 set PYTHONFAULTHANDLER=1 -python -m hpat.tests.gen_test_data +python -m sdc.tests.gen_test_data if errorlevel 1 exit 1 @rem TODO investigate root cause of NumbaPerformanceWarning @rem http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics IF "%SDC_NP_MPI%" == "" ( - python -W ignore -u -m hpat.runtests -v + python -W ignore -u -m sdc.runtests -v ) ELSE ( - mpiexec -localonly -n %SDC_NP_MPI% python -W ignore -u -m hpat.runtests -v) + mpiexec -localonly -n %SDC_NP_MPI% python -W ignore -u -m sdc.runtests -v) if errorlevel 1 exit 1 REM Link check for Documentation using Sphinx's in-built linkchecker diff --git a/buildscripts/sdc-conda-recipe/run_test.sh b/buildscripts/sdc-conda-recipe/run_test.sh index 0059c6e3b..54716dd15 100644 --- a/buildscripts/sdc-conda-recipe/run_test.sh +++ b/buildscripts/sdc-conda-recipe/run_test.sh @@ -6,7 +6,7 @@ export NUMBA_DEVELOPER_MODE=1 export NUMBA_DISABLE_ERROR_MESSAGE_HIGHLIGHTING=1 export PYTHONFAULTHANDLER=1 -python -m hpat.tests.gen_test_data +python -m sdc.tests.gen_test_data #Link check for Documentation using Sphinx's in-built linkchecker #sphinx-build -b linkcheck -j1 usersource _build/html @@ -14,7 +14,7 @@ python -m hpat.tests.gen_test_data # TODO investigate root cause of NumbaPerformanceWarning # http://numba.pydata.org/numba-doc/latest/user/parallel.html#diagnostics if [ -z "$SDC_NP_MPI" ]; then - python -W ignore -u -m hpat.runtests -v + python -W ignore -u -m sdc.runtests -v else - mpiexec -n $SDC_NP_MPI python -W ignore -u -m hpat.runtests -v + mpiexec -n $SDC_NP_MPI python -W ignore -u -m sdc.runtests -v fi diff --git a/buildscripts/sdc_pi_example.py b/buildscripts/sdc_pi_example.py index abb12f3be..23dd093b0 100644 --- a/buildscripts/sdc_pi_example.py +++ b/buildscripts/sdc_pi_example.py @@ -1,9 +1,9 @@ -import hpat +import sdc import numpy as np import time -@hpat.jit +@sdc.jit def calc_pi(n): t1 = time.time() x = 2 * np.random.ranf(n) - 1 diff --git a/buildscripts/test.py b/buildscripts/test.py index dc4138e05..b6026435a 100644 --- a/buildscripts/test.py +++ b/buildscripts/test.py @@ -73,7 +73,7 @@ os.environ['PYTHONPATH'] = '.' os.environ['HDF5_DIR'] = conda_prefix try: - run_command(f'{develop_env_activate} && python -m hpat.tests.gen_test_data && coverage erase && coverage run -m hpat.runtests && coveralls -v') + run_command(f'{develop_env_activate} && python -m sdc.tests.gen_test_data && coverage erase && coverage run -m sdc.runtests && coveralls -v') except: format_print('Coverage fails') print(traceback.format_exc()) @@ -85,13 +85,13 @@ os.chdir(../sdc_src) is a workaround for the following error: Traceback (most recent call last): File "", line 1, in - File "hpat/hpat/__init__.py", line 9, in - import hpat.dict_ext - File "hpat/hpat/dict_ext.py", line 12, in - from hpat.str_ext import string_type, gen_unicode_to_std_str, gen_std_str_to_unicode - File "hpat/hpat/str_ext.py", line 18, in + File "sdc/sdc/__init__.py", line 9, in + import sdc.dict_ext + File "sdc/sdc/dict_ext.py", line 12, in + from sdc.str_ext import string_type, gen_unicode_to_std_str, gen_std_str_to_unicode + File "sdc/sdc/str_ext.py", line 18, in from . import hstr_ext - ImportError: cannot import name 'hstr_ext' from 'hpat' (hpat/hpat/__init__.py) + ImportError: cannot import name 'hstr_ext' from 'sdc' (sdc/sdc/__init__.py) """ os.chdir(os.path.dirname(sdc_src)) run_command(f'{develop_env_activate} && {test_script}') @@ -119,13 +119,13 @@ os.chdir(../sdc_src) is a workaround for the following error: Traceback (most recent call last): File "", line 1, in - File "hpat/hpat/__init__.py", line 9, in - import hpat.dict_ext - File "hpat/hpat/dict_ext.py", line 12, in - from hpat.str_ext import string_type, gen_unicode_to_std_str, gen_std_str_to_unicode - File "hpat/hpat/str_ext.py", line 18, in + File "sdc/sdc/__init__.py", line 9, in + import sdc.dict_ext + File "sdc/sdc/dict_ext.py", line 12, in + from sdc.str_ext import string_type, gen_unicode_to_std_str, gen_std_str_to_unicode + File "sdc/sdc/str_ext.py", line 18, in from . import hstr_ext - ImportError: cannot import name 'hstr_ext' from 'hpat' (hpat/hpat/__init__.py) + ImportError: cannot import name 'hstr_ext' from 'sdc' (sdc/sdc/__init__.py) """ os.chdir(os.path.dirname(sdc_src)) sdc_packages = get_sdc_build_packages(build_folder) diff --git a/buildscripts/utilities.py b/buildscripts/utilities.py index 533dd516b..9021c47e3 100644 --- a/buildscripts/utilities.py +++ b/buildscripts/utilities.py @@ -76,7 +76,7 @@ def get_sdc_build_packages(build_output): sdc_build_dir = os.path.join(build_output, os_dir) for item in os.listdir(sdc_build_dir): item_path = os.path.join(sdc_build_dir, item) - if os.path.isfile(item_path) and re.search(r'^hpat.*\.tar\.bz2$|^hpat.*\.whl$', item): + if os.path.isfile(item_path) and re.search(r'^sdc.*\.tar\.bz2$|^sdc.*\.whl$', item): sdc_packages.append(item_path) return sdc_packages diff --git a/docs/source/conf.py b/docs/source/conf.py index a2408479e..4a4ea2e8b 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -51,9 +51,9 @@ if not sdc_doc_no_api_ref: try: - import hpat # TODO: Rename hpat module name to sdc + import sdc # TODO: Rename hpat module name to sdc except ImportError: - print('IMPORT EXCEPTION: Cannot import hpat. ') + print('IMPORT EXCEPTION: Cannot import sdc. ') print('Documentation generator for API Reference for a given module expects that module ' 'to be installed. Use conda/pip install hpat to install it prior to using API Reference generation') print('If you want to disable API Reference generation, set the environment variable SDC_DOC_NO_API_REF=1') @@ -214,7 +214,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - (master_doc, 'hpat', 'HPAT Documentation', + (master_doc, 'sdc', 'HPAT Documentation', [author], 1) ] diff --git a/docs/source/sdc2pd_name.py b/docs/source/sdc2pd_name.py index b7e20cbbc..d5134b899 100644 --- a/docs/source/sdc2pd_name.py +++ b/docs/source/sdc2pd_name.py @@ -26,7 +26,7 @@ import os import glob -import hpat # TODO: Rename hpat module name to sdc +import sdc # TODO: Rename hpat module name to sdc # ***************************************************************************************************** # *** PARSER CONFIGURATION *** @@ -139,7 +139,7 @@ def parse_file(fname): # Get path to SDC module installed -sdc_path = os.path.dirname(hpat.__file__) # TODO: Change hpat to sdc +sdc_path = os.path.dirname(sdc.__file__) # TODO: Change hpat to sdc sdc_datatypes_path = os.path.join(sdc_path, "datatypes") sdc_datatypes_pathname_mask = os.path.join(sdc_datatypes_path, "*.py") diff --git a/examples/Basic_DataFrame.py b/examples/Basic_DataFrame.py index 3516f81f4..79d18d52e 100644 --- a/examples/Basic_DataFrame.py +++ b/examples/Basic_DataFrame.py @@ -27,10 +27,10 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def merge_df(n): df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n) + 1.0}) df2 = pd.DataFrame({'key2': n - np.arange(n), 'B': n + np.arange(n) + 1.0}) @@ -38,7 +38,7 @@ def merge_df(n): return df3 -@hpat.jit +@sdc.jit def concat_df(n): df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n) + 1.0}) df2 = pd.DataFrame({'key2': n - np.arange(n), 'A': n + np.arange(n) + 2.0}) diff --git a/examples/accel_example.py b/examples/accel_example.py index 602b208e2..cc114f728 100644 --- a/examples/accel_example.py +++ b/examples/accel_example.py @@ -28,10 +28,10 @@ import numpy as np import time import pandas as pd -import hpat +import sdc -@hpat.jit +@sdc.jit def accel_infer(n): t1 = time.time() diff --git a/examples/d4p_kmeans.py b/examples/d4p_kmeans.py index bc5889774..e221f59af 100644 --- a/examples/d4p_kmeans.py +++ b/examples/d4p_kmeans.py @@ -27,11 +27,11 @@ import daal4py import daal4py.hpat -import hpat +import sdc import numpy as np -@hpat.jit(nopython=True) +@sdc.jit(nopython=True) def kmeans(N, D, nClusters, maxit): a = np.random.ranf((N, D)) # doesn't make much sense, but ok for now kmi = daal4py.kmeans_init(nClusters, method='plusPlusDense') @@ -42,4 +42,4 @@ def kmeans(N, D, nClusters, maxit): print(kmeans(10000, 20, 2, 30)) -hpat.distribution_report() +sdc.distribution_report() diff --git a/examples/d4p_linreg.py b/examples/d4p_linreg.py index f9d9304c1..ad49b50ac 100644 --- a/examples/d4p_linreg.py +++ b/examples/d4p_linreg.py @@ -27,17 +27,17 @@ import daal4py import daal4py.hpat -import hpat +import sdc import numpy as np -@hpat.jit +@sdc.jit def lr_predict(N, D, model): data = np.random.ranf((N / 2, D)) return daal4py.linear_regression_prediction().compute(data, model) -@hpat.jit +@sdc.jit def lr_train(N, D): data = np.random.ranf((N, D)) gt = np.random.ranf((N, 2)) @@ -49,4 +49,4 @@ def lr_train(N, D): print(p_res.prediction[0], t_res.model.NumberOfBetas) -hpat.distribution_report() +sdc.distribution_report() diff --git a/examples/hiframes_concat.py b/examples/hiframes_concat.py index 1cc7dc0b4..a602740c6 100644 --- a/examples/hiframes_concat.py +++ b/examples/hiframes_concat.py @@ -27,10 +27,10 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def concat_df(n): df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n) + 1.0}) df2 = pd.DataFrame({'key2': n - np.arange(n), 'A': n + np.arange(n) + 1.0}) @@ -40,4 +40,4 @@ def concat_df(n): n = 10 print(concat_df(n)) -# hpat.distribution_report() +# sdc.distribution_report() diff --git a/examples/hiframes_cumsum.py b/examples/hiframes_cumsum.py index 2d9536567..5305d73ff 100644 --- a/examples/hiframes_cumsum.py +++ b/examples/hiframes_cumsum.py @@ -27,10 +27,10 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def cumsum_df(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.random.ranf(n)}) Ac = df.A.cumsum() @@ -39,4 +39,4 @@ def cumsum_df(n): n = 10 print(cumsum_df(n)) -hpat.distribution_report() +sdc.distribution_report() diff --git a/examples/hiframes_filter.py b/examples/hiframes_filter.py index 0a9e52230..949448f45 100644 --- a/examples/hiframes_filter.py +++ b/examples/hiframes_filter.py @@ -27,10 +27,10 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def filter_df(n): df = pd.DataFrame({'A': np.random.ranf(n), 'B': np.random.ranf(n)}) df1 = df[df.A > .5] diff --git a/examples/hiframes_merge.py b/examples/hiframes_merge.py index 0de5e3a4c..184998e8c 100644 --- a/examples/hiframes_merge.py +++ b/examples/hiframes_merge.py @@ -27,10 +27,10 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def merge_df(n): df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n) + 1.0}) df2 = pd.DataFrame({'key2': n - np.arange(n), 'B': n + np.arange(n) + 1.0}) diff --git a/examples/hiframes_pivot.py b/examples/hiframes_pivot.py index 298345321..0a747ed20 100644 --- a/examples/hiframes_pivot.py +++ b/examples/hiframes_pivot.py @@ -27,10 +27,10 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit(pivots={'pt': ['small', 'large']}) +@sdc.jit(pivots={'pt': ['small', 'large']}) def df_pivot(df): pt = df.pivot_table(index='A', columns='C', values='D', aggfunc='sum') print(pt.small.values) diff --git a/examples/hiframes_rolling.py b/examples/hiframes_rolling.py index 5ead3ae7a..8deab886f 100644 --- a/examples/hiframes_rolling.py +++ b/examples/hiframes_rolling.py @@ -27,24 +27,24 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def rolling_df1(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) Ac = df.A.rolling(5).sum() return Ac.sum() -@hpat.jit +@sdc.jit def rolling_df2(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) df['moving average'] = df.A.rolling(window=5, center=True).mean() return df['moving average'].sum() -@hpat.jit +@sdc.jit def rolling_df3(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) Ac = df.A.rolling(3, center=True).apply(lambda a: a[0] + 2 * a[1] + a[2]) diff --git a/examples/hiframes_shift.py b/examples/hiframes_shift.py index 2c5e64d85..085b80475 100644 --- a/examples/hiframes_shift.py +++ b/examples/hiframes_shift.py @@ -27,17 +27,17 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def shift_df1(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.random.ranf(n)}) Ac = df.A.shift(1) return Ac.sum() -@hpat.jit +@sdc.jit def shift_df2(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.random.ranf(n)}) Ac = df.A.pct_change() diff --git a/examples/hiframes_sort.py b/examples/hiframes_sort.py index f40127106..fd36e075a 100644 --- a/examples/hiframes_sort.py +++ b/examples/hiframes_sort.py @@ -27,10 +27,10 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def df_sort(df): df2 = df.sort_values('A') print(df2.A.values) diff --git a/examples/intraday_mean.py b/examples/intraday_mean.py index 9c7835fb2..097bfecd7 100644 --- a/examples/intraday_mean.py +++ b/examples/intraday_mean.py @@ -29,16 +29,16 @@ import numpy as np import h5py import time -import hpat -from hpat import prange +import sdc +from sdc import prange # adopted from: # http://www.pythonforfinance.net/2017/02/20/intraday-stock-mean-reversion-trading-backtest-in-python/ -@hpat.jit(locals={'s_open': hpat.float64[:], 's_high': hpat.float64[:], - 's_low': hpat.float64[:], 's_close': hpat.float64[:], - 's_vol': hpat.float64[:]}) +@sdc.jit(locals={'s_open': sdc.float64[:], 's_high': sdc.float64[:], + 's_low': sdc.float64[:], 's_close': sdc.float64[:], + 's_vol': sdc.float64[:]}) def intraday_mean_revert(): file_name = "stock_data_all_google.hdf5" f = h5py.File(file_name, "r") diff --git a/examples/intraday_mean_rand.py b/examples/intraday_mean_rand.py index c922511f8..321c5e416 100644 --- a/examples/intraday_mean_rand.py +++ b/examples/intraday_mean_rand.py @@ -28,14 +28,14 @@ import pandas as pd import numpy as np import time -import hpat -from hpat import prange +import sdc +from sdc import prange # adopted from: # http://www.pythonforfinance.net/2017/02/20/intraday-stock-mean-reversion-trading-backtest-in-python/ -@hpat.jit +@sdc.jit def intraday_mean_revert(): nsyms = 1000 max_num_days = 80000 diff --git a/examples/k-means.py b/examples/k-means.py index ccffafa1c..e6c618835 100644 --- a/examples/k-means.py +++ b/examples/k-means.py @@ -30,10 +30,10 @@ import argparse import time import h5py -import hpat +import sdc -@hpat.jit +@sdc.jit def kmeans(numCenter, numIter): f = h5py.File("lr.hdf5", "r") A = f['points'][:] diff --git a/examples/kernel_density_estimation.py b/examples/kernel_density_estimation.py index d15811b89..0d96804e0 100644 --- a/examples/kernel_density_estimation.py +++ b/examples/kernel_density_estimation.py @@ -25,15 +25,15 @@ # ***************************************************************************** -import hpat -from hpat import prange +import sdc +from sdc import prange import numpy as np import h5py import argparse import time -@hpat.jit +@sdc.jit def kde(): f = h5py.File("kde.hdf5", "r") X = f['points'][:] diff --git a/examples/kernel_density_estimation_pq.py b/examples/kernel_density_estimation_pq.py index 2f3d47eac..4b927bad7 100644 --- a/examples/kernel_density_estimation_pq.py +++ b/examples/kernel_density_estimation_pq.py @@ -26,14 +26,14 @@ import pyarrow.parquet as pq -import hpat -from hpat import prange +import sdc +from sdc import prange import numpy as np import argparse import time -@hpat.jit +@sdc.jit def kde(): t = pq.read_table('kde.parquet') df = t.to_pandas() diff --git a/examples/kernel_density_estimation_pq_hdfs.py b/examples/kernel_density_estimation_pq_hdfs.py index 997aafd69..142167463 100644 --- a/examples/kernel_density_estimation_pq_hdfs.py +++ b/examples/kernel_density_estimation_pq_hdfs.py @@ -26,14 +26,14 @@ import pyarrow.parquet as pq -import hpat -from hpat import prange +import sdc +from sdc import prange import numpy as np import argparse import time -@hpat.jit +@sdc.jit def kde(): t = pq.read_table('hdfs://localhost:9016/user/etotoni/kde.parquet') df = t.to_pandas() diff --git a/examples/linear_regression.py b/examples/linear_regression.py index f03b3d109..c42f5f726 100644 --- a/examples/linear_regression.py +++ b/examples/linear_regression.py @@ -25,14 +25,14 @@ # ***************************************************************************** -import hpat +import sdc import numpy as np import h5py import argparse import time -@hpat.jit +@sdc.jit def linear_regression(iterations): f = h5py.File("lir.hdf5", "r") X = f['points'][:] diff --git a/examples/logistic_regression.py b/examples/logistic_regression.py index 271d1d7b6..8d3d0ef51 100644 --- a/examples/logistic_regression.py +++ b/examples/logistic_regression.py @@ -25,14 +25,14 @@ # ***************************************************************************** -import hpat +import sdc import numpy as np import h5py import argparse import time -@hpat.jit +@sdc.jit def logistic_regression(iterations): f = h5py.File("lr.hdf5", "r") X = f['points'][:] diff --git a/examples/logistic_regression_rand.py b/examples/logistic_regression_rand.py index 151d82436..9013225fa 100644 --- a/examples/logistic_regression_rand.py +++ b/examples/logistic_regression_rand.py @@ -26,11 +26,11 @@ import numpy as np -import hpat +import sdc import time -@hpat.jit +@sdc.jit def logistic_regression(iterations): t1 = time.time() N = 10**8 diff --git a/examples/ml_mnb.py b/examples/ml_mnb.py index 1706aaa0d..2d6e68531 100644 --- a/examples/ml_mnb.py +++ b/examples/ml_mnb.py @@ -26,22 +26,22 @@ import numpy as np -import hpat -from hpat import prange -import hpat.ml +import sdc +from sdc import prange +import sdc.ml import time -#hpat.multithread_mode = True +#sdc.multithread_mode = True -@hpat.jit +@sdc.jit def f(N, D, M): X = np.random.randint(0, 5, size=(N, D)).astype(np.int32) y = np.empty(N, dtype=np.int32) for i in prange(N): y[i] = i % 4 p = np.random.randint(0, 5, size=(M, D)).astype(np.int32) - clf = hpat.ml.MultinomialNB(n_classes=4) + clf = sdc.ml.MultinomialNB(n_classes=4) t1 = time.time() clf.train(X, y) res = clf.predict(p) diff --git a/examples/ml_svc.py b/examples/ml_svc.py index 61e49345c..b77d0d44c 100644 --- a/examples/ml_svc.py +++ b/examples/ml_svc.py @@ -26,22 +26,22 @@ import numpy as np -import hpat -from hpat import prange -import hpat.ml +import sdc +from sdc import prange +import sdc.ml import time -hpat.multithread_mode = True +sdc.multithread_mode = True -@hpat.jit +@sdc.jit def f(N, D, M): X = np.random.ranf((N, D)) y = np.empty(N) for i in prange(N): y[i] = i % 4 p = np.random.ranf((M, D)) - clf = hpat.ml.SVC(n_classes=4) + clf = sdc.ml.SVC(n_classes=4) t1 = time.time() clf.train(X, y) res = clf.predict(p) diff --git a/examples/pi.py b/examples/pi.py index 1d942e65a..373b421ec 100644 --- a/examples/pi.py +++ b/examples/pi.py @@ -25,13 +25,13 @@ # ***************************************************************************** -import hpat +import sdc import numpy as np import argparse import time -@hpat.jit +@sdc.jit def calc_pi(n): t1 = time.time() x = 2 * np.random.ranf(n) - 1 diff --git a/examples/ros_example1.py b/examples/ros_example1.py index f1bae7517..c64ccf582 100644 --- a/examples/ros_example1.py +++ b/examples/ros_example1.py @@ -27,9 +27,9 @@ import numpy as np from math import sqrt -import hpat -import hpat.ros -from hpat import prange, stencil +import sdc +import sdc.ros +from sdc import prange, stencil import time @@ -42,10 +42,10 @@ def gaussian_blur(a): + a[-2, 2] * 0.003 + a[-1, 2] * 0.0133 + a[0, 2] * 0.0219 + a[1, 2] * 0.0133 + a[2, 2] * 0.0030) -@hpat.jit +@sdc.jit def read_example(): t1 = time.time() - A = hpat.ros.read_ros_images("image_test.bag") + A = sdc.ros.read_ros_images("image_test.bag") # crop out dashboard B = A[:, :-50, :, :] # intensity threshold @@ -84,4 +84,4 @@ def read_example(): print(read_example().sum()) -# hpat.distribution_report() +# sdc.distribution_report() diff --git a/examples/series_basic.py b/examples/series_basic.py index 5f5f34ccd..94f317d76 100644 --- a/examples/series_basic.py +++ b/examples/series_basic.py @@ -27,17 +27,17 @@ import pandas as pd import numpy as np -import hpat +import sdc -@hpat.jit +@sdc.jit def get_mean(df): ser = pd.Series(df['Bonus %']) m = ser.mean() return m -@hpat.jit +@sdc.jit def sort_name(df): ser = pd.Series(df['First Name']) m = ser.sort_values() diff --git a/generate_data/gen_kde.py b/generate_data/gen_kde.py index f70c08c0e..41215659d 100644 --- a/generate_data/gen_kde.py +++ b/generate_data/gen_kde.py @@ -29,10 +29,10 @@ import numpy as np import argparse import time -import hpat +import sdc -@hpat.jit +@sdc.jit def gen_kde(N, file_name): # np.random.seed(0) points = np.random.random(N) diff --git a/generate_data/gen_kde_pq.py b/generate_data/gen_kde_pq.py index 6f1231ef1..394c85dc4 100644 --- a/generate_data/gen_kde_pq.py +++ b/generate_data/gen_kde_pq.py @@ -31,7 +31,7 @@ import pandas as pd import argparse import time -import hpat +import sdc def gen_kde(N, file_name): diff --git a/generate_data/gen_linear_regression.py b/generate_data/gen_linear_regression.py index 4e6992b14..5c39ec8df 100644 --- a/generate_data/gen_linear_regression.py +++ b/generate_data/gen_linear_regression.py @@ -29,10 +29,10 @@ import numpy as np import argparse import time -import hpat +import sdc -@hpat.jit +@sdc.jit def gen_lir(N, D, p, file_name): # np.random.seed(0) points = np.random.random((N, D)) diff --git a/generate_data/gen_logistic_regression.py b/generate_data/gen_logistic_regression.py index edc1b42d5..6d8c88028 100644 --- a/generate_data/gen_logistic_regression.py +++ b/generate_data/gen_logistic_regression.py @@ -29,10 +29,10 @@ import numpy as np import argparse import time -import hpat +import sdc -@hpat.jit +@sdc.jit def gen_lir(N, D, file_name): # np.random.seed(0) points = np.random.random((N, D)) diff --git a/hpat/tests/tests_perf/__init__.py b/hpat/tests/tests_perf/__init__.py deleted file mode 100644 index d9f607973..000000000 --- a/hpat/tests/tests_perf/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from hpat.tests.tests_perf.test_perf_unicode import * -from hpat.tests.tests_perf.test_perf_series_str import * diff --git a/hpat/__init__.py b/sdc/__init__.py similarity index 67% rename from hpat/__init__.py rename to sdc/__init__.py index b63089fc6..3ebff01b3 100644 --- a/hpat/__init__.py +++ b/sdc/__init__.py @@ -33,29 +33,29 @@ stencil, threading_layer, jitclass, objmode) from numba.types import * -import hpat.dict_ext -import hpat.set_ext -from hpat.set_ext import init_set_string -import hpat.distributed_api -from hpat.distributed_api import dist_time +import sdc.dict_ext +import sdc.set_ext +from sdc.set_ext import init_set_string +import sdc.distributed_api +from sdc.distributed_api import dist_time # legacy for STAC A3, TODO: remove -from hpat.dict_ext import (DictIntInt, DictInt32Int32, dict_int_int_type, +from sdc.dict_ext import (DictIntInt, DictInt32Int32, dict_int_int_type, dict_int32_int32_type) -from hpat.str_ext import string_type -from hpat.str_arr_ext import string_array_type +from sdc.str_ext import string_type +from sdc.str_arr_ext import string_array_type from numba.types import List -from hpat.utils import cprint, distribution_report -import hpat.compiler -import hpat.io -import hpat.io.np_io -import hpat.hiframes.pd_timestamp_ext -import hpat.hiframes.boxing -import hpat.config -import hpat.timsort -from hpat.decorators import jit +from sdc.utils import cprint, distribution_report +import sdc.compiler +import sdc.io +import sdc.io.np_io +import sdc.hiframes.pd_timestamp_ext +import sdc.hiframes.boxing +import sdc.config +import sdc.timsort +from sdc.decorators import jit -if hpat.config._has_xenon: - from hpat.io.xenon_ext import read_xenon, xe_connect, xe_open, xe_close +if sdc.config._has_xenon: + from sdc.io.xenon_ext import read_xenon, xe_connect, xe_open, xe_close multithread_mode = False @@ -64,7 +64,7 @@ del get_versions -if not hpat.config.config_pipeline_hpat_default: +if not sdc.config.config_pipeline_hpat_default: """ Overload Numba functions to allow call SDC pass in Numba compiler pipeline Functions are: @@ -74,11 +74,11 @@ TODO: Needs to detect 'import Pandas' and align initialization according to it """ - hpat.config.numba_typed_passes_annotatetypes_orig = numba.typed_passes.AnnotateTypes.run_pass - numba.typed_passes.AnnotateTypes.run_pass = hpat.datatypes.hpat_pandas_dataframe_pass.sdc_dataframepassimpl_overload + sdc.config.numba_typed_passes_annotatetypes_orig = numba.typed_passes.AnnotateTypes.run_pass + numba.typed_passes.AnnotateTypes.run_pass = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_dataframepassimpl_overload - hpat.config.numba_untyped_passes_inlineclosurelikes_orig = numba.untyped_passes.InlineClosureLikes.run_pass - numba.untyped_passes.InlineClosureLikes.run_pass = hpat.datatypes.hpat_pandas_dataframe_pass.sdc_hiframespassimpl_overload + sdc.config.numba_untyped_passes_inlineclosurelikes_orig = numba.untyped_passes.InlineClosureLikes.run_pass + numba.untyped_passes.InlineClosureLikes.run_pass = sdc.datatypes.hpat_pandas_dataframe_pass.sdc_hiframespassimpl_overload def _init_extension(): '''Register Pandas classes and functions with Numba. diff --git a/hpat/_cv.cpp b/sdc/_cv.cpp similarity index 100% rename from hpat/_cv.cpp rename to sdc/_cv.cpp diff --git a/hpat/_daal.cpp b/sdc/_daal.cpp similarity index 100% rename from hpat/_daal.cpp rename to sdc/_daal.cpp diff --git a/hpat/_datetime_ext.cpp b/sdc/_datetime_ext.cpp similarity index 100% rename from hpat/_datetime_ext.cpp rename to sdc/_datetime_ext.cpp diff --git a/hpat/_datetime_ext.h b/sdc/_datetime_ext.h similarity index 100% rename from hpat/_datetime_ext.h rename to sdc/_datetime_ext.h diff --git a/hpat/_dict_ext.cpp b/sdc/_dict_ext.cpp similarity index 100% rename from hpat/_dict_ext.cpp rename to sdc/_dict_ext.cpp diff --git a/hpat/_distributed.cpp b/sdc/_distributed.cpp similarity index 100% rename from hpat/_distributed.cpp rename to sdc/_distributed.cpp diff --git a/hpat/_distributed.h b/sdc/_distributed.h similarity index 100% rename from hpat/_distributed.h rename to sdc/_distributed.h diff --git a/hpat/_hiframes.cpp b/sdc/_hiframes.cpp similarity index 100% rename from hpat/_hiframes.cpp rename to sdc/_hiframes.cpp diff --git a/hpat/_hpat_common.h b/sdc/_hpat_common.h similarity index 100% rename from hpat/_hpat_common.h rename to sdc/_hpat_common.h diff --git a/hpat/_hpat_sort.h b/sdc/_hpat_sort.h similarity index 100% rename from hpat/_hpat_sort.h rename to sdc/_hpat_sort.h diff --git a/hpat/_import_py.h b/sdc/_import_py.h similarity index 100% rename from hpat/_import_py.h rename to sdc/_import_py.h diff --git a/hpat/_meminfo.h b/sdc/_meminfo.h similarity index 100% rename from hpat/_meminfo.h rename to sdc/_meminfo.h diff --git a/hpat/_ros.cpp b/sdc/_ros.cpp similarity index 100% rename from hpat/_ros.cpp rename to sdc/_ros.cpp diff --git a/hpat/_set_ext.cpp b/sdc/_set_ext.cpp similarity index 100% rename from hpat/_set_ext.cpp rename to sdc/_set_ext.cpp diff --git a/hpat/_str_decode.cpp b/sdc/_str_decode.cpp similarity index 100% rename from hpat/_str_decode.cpp rename to sdc/_str_decode.cpp diff --git a/hpat/_str_ext.cpp b/sdc/_str_ext.cpp similarity index 100% rename from hpat/_str_ext.cpp rename to sdc/_str_ext.cpp diff --git a/hpat/_version.py b/sdc/_version.py similarity index 99% rename from hpat/_version.py rename to sdc/_version.py index 780cbfe94..f21271ad7 100644 --- a/hpat/_version.py +++ b/sdc/_version.py @@ -69,8 +69,8 @@ def get_config(): cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" - cfg.parentdir_prefix = "hpat-" - cfg.versionfile_source = "hpat/_version.py" + cfg.parentdir_prefix = "sdc-" + cfg.versionfile_source = "sdc/_version.py" cfg.verbose = False return cfg diff --git a/hpat/compiler.py b/sdc/compiler.py similarity index 92% rename from hpat/compiler.py rename to sdc/compiler.py index 2cb771126..748325554 100644 --- a/hpat/compiler.py +++ b/sdc/compiler.py @@ -27,13 +27,13 @@ # from .pio import PIO from llvmlite import binding -import hpat -import hpat.hiframes -import hpat.hiframes.hiframes_untyped -import hpat.hiframes.hiframes_typed -from hpat.hiframes.hiframes_untyped import HiFramesPass -from hpat.hiframes.hiframes_typed import HiFramesTypedPass -from hpat.hiframes.dataframe_pass import DataFramePass +import sdc +import sdc.hiframes +import sdc.hiframes.hiframes_untyped +import sdc.hiframes.hiframes_typed +from sdc.hiframes.hiframes_untyped import HiFramesPass +from sdc.hiframes.hiframes_typed import HiFramesTypedPass +from sdc.hiframes.dataframe_pass import DataFramePass import numba import numba.compiler from numba.compiler import DefaultPassBuilder @@ -43,11 +43,11 @@ from numba.inline_closurecall import inline_closure_call, InlineClosureCallPass from numba.typed_passes import (NopythonTypeInference, AnnotateTypes, ParforPass, IRLegalization) from numba.untyped_passes import (DeadBranchPrune, InlineInlinables, InlineClosureLikes) -from hpat import config -from hpat.distributed import DistributedPass -import hpat.io +from sdc import config +from sdc.distributed import DistributedPass +import sdc.io if config._has_h5py: - from hpat.io import pio + from sdc.io import pio from numba.compiler_machinery import FunctionPass, register_pass @@ -124,7 +124,7 @@ def replace_pass(pm, pass_cls, location): @register_pass(mutates_CFG=True, analysis_only=False) class InlinePass(FunctionPass): - _name = "hpat_inline_pass" + _name = "sdc_inline_pass" def __init__(self): pass @@ -136,7 +136,7 @@ def run_pass(self, state): @register_pass(mutates_CFG=True, analysis_only=False) class PostprocessorPass(FunctionPass): - _name = "hpat_postprocessor_pass" + _name = "sdc_postprocessor_pass" def __init__(self): pass @@ -152,7 +152,7 @@ class SDCPipeline(numba.compiler.CompilerBase): """ def define_pipelines(self): - name = 'hpat' + name = 'sdc' pm = DefaultPassBuilder.define_nopython_pipeline(self.state) add_pass_before(pm, InlinePass, InlineClosureLikes) @@ -168,7 +168,7 @@ def define_pipelines(self): @register_pass(mutates_CFG=True, analysis_only=False) class ParforSeqPass(FunctionPass): - _name = "hpat_parfor_seq_pass" + _name = "sdc_parfor_seq_pass" def __init__(self): pass @@ -185,7 +185,7 @@ class SDCPipelineSeq(SDCPipeline): """ def define_pipelines(self): - name = 'hpat_seq' + name = 'sdc_seq' pm = DefaultPassBuilder.define_nopython_pipeline(self.state) add_pass_before(pm, InlinePass, InlineClosureLikes) diff --git a/hpat/config.py b/sdc/config.py similarity index 98% rename from hpat/config.py rename to sdc/config.py index 0edf80e14..fd3f154d2 100644 --- a/hpat/config.py +++ b/sdc/config.py @@ -61,7 +61,7 @@ _has_opencv = False else: _has_opencv = True - import hpat.cv_ext + import sdc.cv_ext try: from . import hxe_ext @@ -69,7 +69,7 @@ _has_xenon = False else: _has_xenon = True - import hpat.io.xenon_ext + import sdc.io.xenon_ext config_transport_mpi_default = distutils_util.strtobool(os.getenv('SDC_CONFIG_MPI', 'True')) ''' diff --git a/hpat/cv_ext.py b/sdc/cv_ext.py similarity index 99% rename from hpat/cv_ext.py rename to sdc/cv_ext.py index caf597d04..694e61584 100644 --- a/hpat/cv_ext.py +++ b/sdc/cv_ext.py @@ -26,12 +26,12 @@ import numba -import hpat +import sdc from numba import types from numba.typing.templates import infer_global, AbstractTemplate, infer, signature from numba.extending import lower_builtin, overload, intrinsic from numba import cgutils -from hpat.str_ext import string_type +from sdc.str_ext import string_type from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed from numba.targets.arrayobj import _empty_nd_impl diff --git a/hpat/datatypes/__init__.py b/sdc/datatypes/__init__.py similarity index 90% rename from hpat/datatypes/__init__.py rename to sdc/datatypes/__init__.py index 3db787772..e2ce9ec3c 100644 --- a/hpat/datatypes/__init__.py +++ b/sdc/datatypes/__init__.py @@ -25,6 +25,6 @@ # ***************************************************************************** -import hpat.datatypes.hpat_pandas_dataframe_pass -import hpat.datatypes.hpat_pandas_seriesgroupby_functions -import hpat.datatypes.hpat_pandas_stringmethods_functions +import sdc.datatypes.hpat_pandas_dataframe_pass +import sdc.datatypes.hpat_pandas_seriesgroupby_functions +import sdc.datatypes.hpat_pandas_stringmethods_functions diff --git a/hpat/datatypes/common_functions.py b/sdc/datatypes/common_functions.py similarity index 94% rename from hpat/datatypes/common_functions.py rename to sdc/datatypes/common_functions.py index 3b0fb5fdc..b5745e8d1 100644 --- a/hpat/datatypes/common_functions.py +++ b/sdc/datatypes/common_functions.py @@ -36,8 +36,8 @@ from numba.extending import overload from numba import numpy_support -import hpat -from hpat.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to) +import sdc +from sdc.str_arr_ext import (string_array_type, num_total_chars, append_string_array_to) def has_literal_value(var, value): @@ -106,7 +106,7 @@ def _append_list_numeric_impl(A, B): def _append_single_string_array_impl(A, B): total_size = len(A) + len(B) total_chars = num_total_chars(A) + num_total_chars(B) - new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) + new_data = sdc.str_arr_ext.pre_alloc_string_array(total_size, total_chars) pos = 0 pos += append_string_array_to(new_data, pos, A) @@ -121,7 +121,7 @@ def _append_list_string_array_impl(A, B): total_size = numpy.array([len(arr) for arr in array_list]).sum() total_chars = numpy.array([num_total_chars(arr) for arr in array_list]).sum() - new_data = hpat.str_arr_ext.pre_alloc_string_array(total_size, total_chars) + new_data = sdc.str_arr_ext.pre_alloc_string_array(total_size, total_chars) pos = 0 pos += append_string_array_to(new_data, pos, A) diff --git a/hpat/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py similarity index 95% rename from hpat/datatypes/hpat_pandas_dataframe_functions.py rename to sdc/datatypes/hpat_pandas_dataframe_functions.py index d75f9f1cb..b17ceb47c 100644 --- a/hpat/datatypes/hpat_pandas_dataframe_functions.py +++ b/sdc/datatypes/hpat_pandas_dataframe_functions.py @@ -36,7 +36,7 @@ from numba.extending import (overload, overload_method, overload_attribute) from numba.errors import TypingError -from hpat.datatypes.hpat_pandas_dataframe_types import DataFrameType +from sdc.datatypes.hpat_pandas_dataframe_types import DataFrameType @overload_method(DataFrameType, 'count') @@ -46,7 +46,7 @@ def sdc_pandas_dataframe_count(self, axis=0, level=None, numeric_only=False): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_dataframe.TestDataFrame.test_count + Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_count Parameters ----------- diff --git a/hpat/datatypes/hpat_pandas_dataframe_pass.py b/sdc/datatypes/hpat_pandas_dataframe_pass.py similarity index 79% rename from hpat/datatypes/hpat_pandas_dataframe_pass.py rename to sdc/datatypes/hpat_pandas_dataframe_pass.py index 670a171e4..f98e1bd8d 100644 --- a/hpat/datatypes/hpat_pandas_dataframe_pass.py +++ b/sdc/datatypes/hpat_pandas_dataframe_pass.py @@ -28,7 +28,7 @@ | Procedures are required for SDC DataFrameType handling in Numba ''' -import hpat +import sdc def sdc_dataframepassimpl_overload(*args, **kwargs): @@ -46,20 +46,20 @@ def sdc_dataframepassimpl_overload(*args, **kwargs): no more needs Numba IR transformations via DataFramePass """ - if hpat.config.numba_typed_passes_annotatetypes_orig is None: + if sdc.config.numba_typed_passes_annotatetypes_orig is None: """ Unexpected usage of this function """ return False - status_numba_pass = hpat.config.numba_typed_passes_annotatetypes_orig(*args, **kwargs) + status_numba_pass = sdc.config.numba_typed_passes_annotatetypes_orig(*args, **kwargs) numba_state_var = args[1] - status_dataframe_pass = hpat.hiframes.dataframe_pass.DataFramePassImpl(numba_state_var).run_pass() - status_postprocess_pass = hpat.compiler.PostprocessorPass().run_pass(numba_state_var) - status_dataframe_typed_pass = hpat.hiframes.hiframes_typed.HiFramesTypedPassImpl(numba_state_var).run_pass() + status_dataframe_pass = sdc.hiframes.dataframe_pass.DataFramePassImpl(numba_state_var).run_pass() + status_postprocess_pass = sdc.compiler.PostprocessorPass().run_pass(numba_state_var) + status_dataframe_typed_pass = sdc.hiframes.hiframes_typed.HiFramesTypedPassImpl(numba_state_var).run_pass() is_ir_mutated = status_numba_pass or status_dataframe_pass or status_postprocess_pass or status_dataframe_typed_pass @@ -79,7 +79,7 @@ def sdc_hiframespassimpl_overload(*args, **kwargs): no more needs Numba IR transformations via DataFramePass """ - if hpat.config.numba_untyped_passes_inlineclosurelikes_orig is None: + if sdc.config.numba_untyped_passes_inlineclosurelikes_orig is None: """ Unexpected usage of this function """ @@ -88,10 +88,10 @@ def sdc_hiframespassimpl_overload(*args, **kwargs): numba_state_var = args[1] - status_inlinepass_pass = hpat.compiler.InlinePass().run_pass(numba_state_var) - status_hiframespass_pass = hpat.hiframes.hiframes_untyped.HiFramesPassImpl(numba_state_var).run_pass() + status_inlinepass_pass = sdc.compiler.InlinePass().run_pass(numba_state_var) + status_hiframespass_pass = sdc.hiframes.hiframes_untyped.HiFramesPassImpl(numba_state_var).run_pass() - status_numba_pass = hpat.config.numba_untyped_passes_inlineclosurelikes_orig(*args, **kwargs) + status_numba_pass = sdc.config.numba_untyped_passes_inlineclosurelikes_orig(*args, **kwargs) is_ir_mutated = status_inlinepass_pass or status_hiframespass_pass or status_numba_pass diff --git a/hpat/datatypes/hpat_pandas_dataframe_types.py b/sdc/datatypes/hpat_pandas_dataframe_types.py similarity index 96% rename from hpat/datatypes/hpat_pandas_dataframe_types.py rename to sdc/datatypes/hpat_pandas_dataframe_types.py index dbfdd533e..ff8a16766 100644 --- a/hpat/datatypes/hpat_pandas_dataframe_types.py +++ b/sdc/datatypes/hpat_pandas_dataframe_types.py @@ -40,7 +40,7 @@ from numba.datamodel import register_default, StructModel from numba.typing.templates import signature, infer_global, AbstractTemplate -from hpat.config import config_pipeline_hpat_default +from sdc.config import config_pipeline_hpat_default class DataFrameTypeIterator(types.SimpleIteratorType): @@ -95,7 +95,7 @@ class DataFrameType(types.IterableType): expected columns if passed as a list. This data is interpreted as columns if passed as a dictinary only. - Test: python -m hpat.runtests hpat.tests.test_dataframe.TestDataFrame.test_create + Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_create """ def __init__(self, data=None): @@ -117,7 +117,7 @@ class DataFrameTypeModel(StructModel): Model for DataFrameType type All members must be the same as main type for this model - Test: python -m hpat.runtests hpat.tests.test_dataframe.TestDataFrame.test_create_numeric_column + Test: python -m sdc.runtests sdc.tests.test_dataframe.TestDataFrame.test_create_numeric_column """ def __init__(self, dmm, fe_type): diff --git a/hpat/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py similarity index 89% rename from hpat/datatypes/hpat_pandas_series_functions.py rename to sdc/datatypes/hpat_pandas_series_functions.py index 31693dfec..6f2e85171 100644 --- a/hpat/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -38,12 +38,12 @@ from numba.extending import overload, overload_method, overload_attribute from numba import types -import hpat -import hpat.datatypes.common_functions as common_functions -from hpat.datatypes.hpat_pandas_stringmethods_types import StringMethodsType -from hpat.hiframes.pd_series_ext import SeriesType -from hpat.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars) -from hpat.utils import to_array +import sdc +import sdc.datatypes.common_functions as common_functions +from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType +from sdc.hiframes.pd_series_ext import SeriesType +from sdc.str_arr_ext import (StringArrayType, cp_str_list_to_array, num_total_chars) +from sdc.utils import to_array class TypeChecker: """ @@ -100,7 +100,7 @@ def hpat_pandas_series_getitem(self, idx): Pandas Series operator :attr:`pandas.Series.get` implementation **Algorithm**: result = series[idx] - **Test**: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_static_getitem_series1 + **Test**: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_static_getitem_series1 Parameters ---------- @@ -123,7 +123,7 @@ def hpat_pandas_series_getitem(self, idx): if isinstance(idx, types.Integer): def hpat_pandas_series_getitem_idx_integer_impl(self, idx): """ - **Test**: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_iloc1 + **Test**: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_iloc1 """ result = self._data[idx] @@ -134,7 +134,7 @@ def hpat_pandas_series_getitem_idx_integer_impl(self, idx): if isinstance(idx, types.SliceType): def hpat_pandas_series_getitem_idx_slice_impl(self, idx): """ - **Test**: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_iloc2 + **Test**: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_iloc2 """ result = pandas.Series(self._data[idx]) @@ -145,7 +145,7 @@ def hpat_pandas_series_getitem_idx_slice_impl(self, idx): if isinstance(idx, SeriesType): def hpat_pandas_series_getitem_idx_series_impl(self, idx): """ - **Test**: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_setitem_series_bool2 + **Test**: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_setitem_series_bool2 """ super_index = idx._data result = self._data[super_index] @@ -165,7 +165,7 @@ def hpat_pandas_series_iloc(self): Pandas Series operators :attr:`pandas.Series.at`, :attr:`pandas.Series.iat`, :attr:`pandas.Series.iloc`, :attr:`pandas.Series.loc` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_iloc2 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_iloc2 Parameters ---------- @@ -195,7 +195,7 @@ def hpat_pandas_series_nsmallest(self, n=5, keep='first'): Pandas Series method :meth:`pandas.Series.nsmallest` implementation. .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_nsmallest* + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_nsmallest* Parameters ---------- @@ -245,7 +245,7 @@ def hpat_pandas_series_nlargest(self, n=5, keep='first'): Pandas Series method :meth:`pandas.Series.nlargest` implementation. .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_nlargest* + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_nlargest* Parameters ---------- @@ -296,7 +296,7 @@ def hpat_pandas_series_shape(self): """ Pandas Series attribute :attr:`pandas.Series.shape` implementation **Algorithm**: result = series.shape - **Test**: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_shape1 + **Test**: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_shape1 Parameters ---------- series: :obj:`pandas.Series` @@ -324,10 +324,10 @@ def hpat_pandas_series_std(self, axis=None, skipna=None, level=None, ddof=1, num Pandas Series method :meth:`pandas.Series.std` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unboxing - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_str - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_std_unsupported_params + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_std + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_std_unboxing + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_std_str + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_std_unsupported_params Parameters ---------- @@ -395,7 +395,7 @@ def hpat_pandas_series_values(self): Where: series: pandas.series result: pandas.series as ndarray - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_values + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_values """ _func_name = 'Attribute values.' @@ -415,7 +415,7 @@ def hpat_pandas_series_value_counts(self, normalize=False, sort=True, ascending= Pandas Series method :meth:`pandas.Series.value_counts` implementation. .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_value_counts* + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_value_counts* Parameters ----------- @@ -527,10 +527,10 @@ def hpat_pandas_series_var(self, axis=None, skipna=None, level=None, ddof=1, num Pandas Series method :meth:`pandas.Series.var` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_unboxing - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_str - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_var_unsupported_params + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_var + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_var_unboxing + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_var_str + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_var_unsupported_params Parameters ---------- @@ -606,8 +606,8 @@ def hpat_pandas_series_index(self): """ Pandas Series attribute :attr:`pandas.Series.index` implementation **Algorithm**: result = series.index - **Test**: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_index1 - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_index2 + **Test**: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_index1 + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_index2 Parameters ---------- series: :obj:`pandas.Series` @@ -642,7 +642,7 @@ def hpat_pandas_series_size(self): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_size + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_size Parameters ---------- @@ -673,7 +673,7 @@ def hpat_pandas_series_str(self): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_hiframes.TestHiFrames.test_str_get + Test: python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_get Parameters ---------- @@ -708,7 +708,7 @@ def hpat_pandas_series_ndim(self): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_getattr_ndim + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_getattr_ndim Parameters ---------- @@ -739,7 +739,7 @@ def hpat_pandas_series_T(self): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_getattr_T + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_getattr_T Parameters ---------- @@ -770,7 +770,7 @@ def hpat_pandas_series_len(self): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_len + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_len Parameters ---------- @@ -799,7 +799,7 @@ def hpat_pandas_series_astype(self, dtype, copy=True, errors='raise'): Pandas Series method :meth:`pandas.Series.astype` implementation. Cast a pandas object to a specified dtype dtype .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_astype* + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_astype* Parameters ----------- @@ -842,7 +842,7 @@ def hpat_pandas_series_astype_to_str_impl(self, dtype, copy=True, errors='raise' item = self._data[i] num_chars += len(str(item)) # TODO: check NA - data = hpat.str_arr_ext.pre_alloc_string_array(arr_len, num_chars) + data = sdc.str_arr_ext.pre_alloc_string_array(arr_len, num_chars) for i in numba.parfor.internal_prange(arr_len): item = self._data[i] data[i] = str(item) # TODO: check NA @@ -902,12 +902,12 @@ def hpat_pandas_series_shift(self, periods=1, freq=None, axis=0, fill_value=None Pandas Series method :meth:`pandas.Series.shift` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_shift - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_shift_unboxing - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_shift_full - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_shift_str - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_shift_fill_str - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_shift_unsupported_params + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_shift + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_shift_unboxing + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_shift_full + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_shift_str + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_shift_fill_str + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_shift_unsupported_params Parameters ---------- @@ -974,7 +974,7 @@ def hpat_pandas_series_isin(self, values): """ Pandas Series method :meth:`pandas.Series.isin` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_isin_list1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_isin_list1 Parameters ----------- values : :obj:`list` or :obj:`set` object @@ -1010,7 +1010,7 @@ def hpat_pandas_series_append(self, to_append, ignore_index=False, verify_integr .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_append* + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_append* Parameters ----------- @@ -1099,9 +1099,9 @@ def hpat_pandas_series_copy(self, deep=True): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_copy_str1 - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_copy_int1 - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_copy_deep + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_copy_str1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_copy_int1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_copy_deep Parameters ----------- @@ -1146,9 +1146,9 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_corr - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_corr_unsupported_dtype - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_corr_unsupported_period + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_corr + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_corr_unsupported_dtype + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_corr_unsupported_period Parameters ---------- @@ -1211,7 +1211,7 @@ def hpat_pandas_series_head(self, n=5): Pandas Series method :meth:`pandas.Series.head` implementation. .. only:: developer - Test: python -m -k hpat.runtests hpat.tests.test_series.TestSeries.test_series_head* + Test: python -m -k sdc.runtests sdc.tests.test_series.TestSeries.test_series_head* Parameters ----------- @@ -1257,7 +1257,7 @@ def hpat_pandas_series_groupby( """ Pandas Series method :meth:`pandas.Series.groupby` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_groupby_count + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_groupby_count Parameters ----------- self: :class:`pandas.Series` @@ -1322,9 +1322,9 @@ def hpat_pandas_series_isna(self): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_isna1 - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_str_isna1 - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_isnull1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_isna1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str_isna1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_isnull1 Parameters ----------- @@ -1374,7 +1374,7 @@ def hpat_pandas_series_notna(self): .. only:: developer - Test: python -m -k hpat.runtests hpat.tests.test_series.TestSeries.test_series_notna* + Test: python -m -k sdc.runtests sdc.tests.test_series.TestSeries.test_series_notna* Parameters ----------- @@ -1412,7 +1412,7 @@ def hpat_pandas_series_ne(self, other, level=None, fill_value=None, axis=0): Pandas Series method :meth:`pandas.Series.ne` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 Parameters ---------- @@ -1445,7 +1445,7 @@ def hpat_pandas_series_ne(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_ne_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 """ return pandas.Series(self._data != other._data) @@ -1455,8 +1455,8 @@ def hpat_pandas_series_ne_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_ne_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_float_scalar """ return pandas.Series(self._data != other) @@ -1474,7 +1474,7 @@ def hpat_pandas_series_add(self, other, level=None, fill_value=None, axis=0): Pandas Series method :meth:`pandas.Series.add` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 Parameters ---------- @@ -1507,7 +1507,7 @@ def hpat_pandas_series_add(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_add_impl(lhs, rhs): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ return pandas.Series(lhs._data + rhs._data) @@ -1517,8 +1517,8 @@ def hpat_pandas_series_add_impl(lhs, rhs): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_add_number_impl(lhs, rhs): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_float_scalar """ return pandas.Series(lhs._data + rhs) @@ -1536,7 +1536,7 @@ def hpat_pandas_series_sub(self, other, level=None, fill_value=None, axis=0): Pandas Series method :meth:`pandas.Series.sub` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 Parameters ---------- @@ -1569,7 +1569,7 @@ def hpat_pandas_series_sub(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_sub_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ return pandas.Series(self._data - other._data) @@ -1579,8 +1579,8 @@ def hpat_pandas_series_sub_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_sub_number_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_float_scalar """ return pandas.Series(self._data - other) @@ -1605,8 +1605,8 @@ def hpat_pandas_series_sum( .. only:: developer Tests: - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sum1 - # python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sum2 + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_sum1 + # python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_sum2 Parameters ---------- @@ -1664,7 +1664,7 @@ def hpat_pandas_series_sum_impl( min_count=0, ): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_sum1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_sum1 """ if skipna is None: skipna = True @@ -1680,12 +1680,12 @@ def hpat_pandas_series_take(self, indices, axis=0, is_copy=False): """ Pandas Series method :meth:`pandas.Series.take` implementation. .. only:: developer - Tests: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_take_index_default - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_take_index_default_unboxing - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_take_index_int - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_take_index_int_unboxing - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_take_index_str - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_take_index_str_unboxing + Tests: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_take_index_default + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_take_index_default_unboxing + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_take_index_int + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_take_index_int_unboxing + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_take_index_str + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_take_index_str_unboxing Parameters ---------- self: :obj:`pandas.Series` @@ -1740,10 +1740,10 @@ def hpat_pandas_series_idxmax(self, axis=None, skipna=True, *args): """ Pandas Series method :meth:`pandas.Series.idxmax` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmax1 - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmax_str_idx - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmax_noidx - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmax_idx + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmax1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmax_str_idx + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmax_noidx + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmax_idx Parameters ----------- @@ -1799,7 +1799,7 @@ def hpat_pandas_series_mul(self, other, level=None, fill_value=None, axis=0): Pandas Series method :meth:`pandas.Series.mul` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 Parameters ---------- @@ -1832,7 +1832,7 @@ def hpat_pandas_series_mul(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_mul_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ return pandas.Series(self._data * other._data) @@ -1842,8 +1842,8 @@ def hpat_pandas_series_mul_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_mul_number_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_float_scalar """ return pandas.Series(self._data * other) @@ -1860,7 +1860,7 @@ def hpat_pandas_series_div(self, other, level=None, fill_value=None, axis=0): Pandas Series method :meth:`pandas.Series.div` and :meth:`pandas.Series.truediv` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 Parameters ---------- @@ -1893,7 +1893,7 @@ def hpat_pandas_series_div(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_div_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ return pandas.Series(self._data / other._data) @@ -1903,8 +1903,8 @@ def hpat_pandas_series_div_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_div_number_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_float_scalar """ return pandas.Series(self._data / other) @@ -1920,7 +1920,7 @@ def hpat_pandas_series_floordiv(self, other, level=None, fill_value=None, axis=0 Pandas Series method :meth:`pandas.Series.floordiv` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 Parameters ---------- @@ -1953,7 +1953,7 @@ def hpat_pandas_series_floordiv(self, other, level=None, fill_value=None, axis=0 if isinstance(other, SeriesType): def hpat_pandas_series_floordiv_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ return pandas.Series(self._data // other._data) @@ -1963,8 +1963,8 @@ def hpat_pandas_series_floordiv_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_floordiv_number_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_float_scalar """ return pandas.Series(self._data // other) @@ -1979,7 +1979,7 @@ def hpat_pandas_series_pow(self, other, level=None, fill_value=None, axis=0): """ Pandas Series method :meth:`pandas.Series.pow` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 Parameters ---------- self: :class:`pandas.Series` @@ -2011,7 +2011,7 @@ def hpat_pandas_series_pow(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_pow_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ return pandas.Series(self._data ** other._data) @@ -2021,8 +2021,8 @@ def hpat_pandas_series_pow_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_pow_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_float_scalar """ return pandas.Series(self._data ** other) @@ -2041,7 +2041,7 @@ def hpat_pandas_series_prod(self, axis=None, skipna=True, level=None, numeric_on .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_prod + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_prod Parameters ----------- @@ -2104,8 +2104,8 @@ def hpat_pandas_series_quantile(self, q=0.5, interpolation='linear'): """ Pandas Series method :meth:`pandas.Series.quantile` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_quantile - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_quantile_q_vector + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_quantile + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_quantile_q_vector Parameters ----------- q : :obj: float or array-like object, default 0.5 @@ -2139,8 +2139,8 @@ def hpat_pandas_series_min(self, axis=None, skipna=True, level=None, numeric_onl """ Pandas Series method :meth:`pandas.Series.min` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_min - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_min_param + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_min + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_min_param Parameters ----------- axis: @@ -2192,8 +2192,8 @@ def hpat_pandas_series_max(self, axis=None, skipna=True, level=None, numeric_onl """ Pandas Series method :meth:`pandas.Series.max` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_max - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_max_param + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_max + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_max_param Parameters ----------- axis: @@ -2247,7 +2247,7 @@ def hpat_pandas_series_mean(self, axis=None, skipna=None, level=None, numeric_on .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_mean + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_mean Parameters ----------- @@ -2308,7 +2308,7 @@ def hpat_pandas_series_mod(self, other, level=None, fill_value=None, axis=0): """ Pandas Series method :meth:`pandas.Series.mod` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 Parameters ---------- self: :class:`pandas.Series` @@ -2340,7 +2340,7 @@ def hpat_pandas_series_mod(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_mod_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5 """ return pandas.Series(self._data % other._data) @@ -2350,8 +2350,8 @@ def hpat_pandas_series_mod_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_mod_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op5_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op5_float_scalar """ return pandas.Series(self._data % other) @@ -2368,7 +2368,7 @@ def hpat_pandas_series_eq(self, other, level=None, fill_value=None, axis=0): """ Pandas Series method :meth:`pandas.Series.eq` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 Parameters ---------- self: :class:`pandas.Series` @@ -2400,7 +2400,7 @@ def hpat_pandas_series_eq(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_eq_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 """ return pandas.Series(self._data == other._data) @@ -2410,8 +2410,8 @@ def hpat_pandas_series_eq_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_eq_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_float_scalar """ return pandas.Series(self._data == other) @@ -2428,7 +2428,7 @@ def hpat_pandas_series_ge(self, other, level=None, fill_value=None, axis=0): """ Pandas Series method :meth:`pandas.Series.ge` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 Parameters ---------- self: :class:`pandas.Series` @@ -2460,7 +2460,7 @@ def hpat_pandas_series_ge(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_ge_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 """ return pandas.Series(self._data >= other._data) @@ -2470,8 +2470,8 @@ def hpat_pandas_series_ge_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_ge_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_float_scalar """ return pandas.Series(self._data >= other) @@ -2490,13 +2490,13 @@ def hpat_pandas_series_idxmin(self, axis=None, skipna=True, *args): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin1 - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_str - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_str_idx - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_no - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_int - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_noidx - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_idxmin_idx + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmin1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmin_str + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmin_str_idx + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmin_no + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmin_int + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmin_noidx + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_idxmin_idx Parameters ----------- @@ -2552,7 +2552,7 @@ def hpat_pandas_series_lt(self, other, level=None, fill_value=None, axis=0): """ Pandas Series method :meth:`pandas.Series.lt` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 Parameters ---------- self: :class:`pandas.Series` @@ -2584,7 +2584,7 @@ def hpat_pandas_series_lt(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_lt_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 """ return pandas.Series(self._data < other._data) @@ -2594,8 +2594,8 @@ def hpat_pandas_series_lt_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_lt_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_float_scalar """ return pandas.Series(self._data < other) @@ -2612,7 +2612,7 @@ def hpat_pandas_series_gt(self, other, level=None, fill_value=None, axis=0): """ Pandas Series method :meth:`pandas.Series.gt` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 Parameters ---------- self: :class:`pandas.Series` @@ -2644,7 +2644,7 @@ def hpat_pandas_series_gt(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_gt_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 """ return pandas.Series(self._data > other._data) @@ -2654,8 +2654,8 @@ def hpat_pandas_series_gt_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_gt_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_float_scalar """ return pandas.Series(self._data > other) @@ -2672,7 +2672,7 @@ def hpat_pandas_series_le(self, other, level=None, fill_value=None, axis=0): """ Pandas Series method :meth:`pandas.Series.le` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 Parameters ---------- self: :class:`pandas.Series` @@ -2704,7 +2704,7 @@ def hpat_pandas_series_le(self, other, level=None, fill_value=None, axis=0): if isinstance(other, SeriesType): def hpat_pandas_series_le_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8 """ return pandas.Series(self._data <= other._data) @@ -2714,8 +2714,8 @@ def hpat_pandas_series_le_impl(self, other): if isinstance(other, types.Integer) or isinstance(other, types.Float): def hpat_pandas_series_le_impl(self, other): """ - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_integer_scalar - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_op8_float_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_integer_scalar + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_op8_float_scalar """ return pandas.Series(self._data <= other) @@ -2732,7 +2732,7 @@ def hpat_pandas_series_abs(self): """ Pandas Series method :meth:`pandas.Series.abs` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_abs1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_abs1 Parameters ----------- self: :obj:`pandas.Series` @@ -2766,7 +2766,7 @@ def hpat_pandas_series_unique(self): Pandas Series method :meth:`pandas.Series.unique` implementation. Note: Return values order is unspecified .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_unique_sorted + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_unique_sorted Parameters ----------- self: :class:`pandas.Series` @@ -2789,7 +2789,7 @@ def hpat_pandas_series_unique_str_impl(self): Note: Can't use Numpy due to StringArrayType has no ravel() for noPython mode. Also, NotImplementedError: unicode_type cannot be represented as a Numpy dtype - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_unique_str + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_unique_str ''' str_set = set(self._data) @@ -2801,7 +2801,7 @@ def hpat_pandas_series_unique_impl(self): ''' Returns sorted unique elements of an array - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_unique + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_unique ''' return numpy.unique(self._data) @@ -2815,11 +2815,11 @@ def hpat_pandas_series_cumsum(self, axis=None, skipna=True, *args): Pandas Series method :meth:`pandas.Series.cumsum` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_cumsum - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_cumsum_unboxing - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_cumsum_full - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_cumsum_str - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_cumsum_unsupported_axis + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_cumsum + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_cumsum_unboxing + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_cumsum_full + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_cumsum_str + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_cumsum_unsupported_axis Parameters ---------- @@ -2872,7 +2872,7 @@ def hpat_pandas_series_nunique(self, dropna=True): Note: Unsupported mixed numeric and string data .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_nunique + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_nunique Parameters ----------- self: :obj:`pandas.Series` @@ -2927,7 +2927,7 @@ def hpat_pandas_series_count(self, level=None): """ Pandas Series method :meth:`pandas.Series.count` implementation. .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_count + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_count Parameters ----------- @@ -2962,7 +2962,7 @@ def hpat_pandas_series_count_impl(self, level=None): """ Return number of non-NA/null observations in the object Returns number of unique elements in the object - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_count + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_count """ data_no_nan = self._data[~numpy.isnan(self._data)] return len(data_no_nan) @@ -2977,9 +2977,9 @@ def hpat_pandas_series_median(self, axis=None, skipna=True, level=None, numeric_ .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median1 - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median_skipna_default1 - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_median_skipna_false1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_median1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_median_skipna_default1 + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_median_skipna_false1 Parameters ----------- @@ -3045,7 +3045,7 @@ def hpat_pandas_series_argsort(self, axis=0, kind='quicksort', order=None): .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_argsort* + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_argsort* Parameters ----------- @@ -3097,7 +3097,7 @@ def hpat_pandas_series_argsort_idx_impl(self, axis=0, kind='quicksort', order=No sort = numpy.argsort(self._data) na = self.isna().sum() result = numpy.empty(len(self._data), dtype=numpy.int64) - na_data_arr = hpat.hiframes.api.get_nan_mask(self._data) + na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) if kind == 'mergesort': sort_nona = numpy.argsort(self._data[~na_data_arr], kind='mergesort') else: @@ -3124,7 +3124,7 @@ def hpat_pandas_series_argsort_noidx_impl(self, axis=0, kind='quicksort', order= sort = numpy.argsort(self._data) na = self.isna().sum() result = numpy.empty(len(self._data), dtype=numpy.int64) - na_data_arr = hpat.hiframes.api.get_nan_mask(self._data) + na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) if kind == 'mergesort': sort_nona = numpy.argsort(self._data[~na_data_arr], kind='mergesort') else: @@ -3150,7 +3150,7 @@ def hpat_pandas_series_sort_values(self, axis=0, ascending=True, inplace=False, .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_sort_values* + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_sort_values* Parameters ----------- @@ -3325,7 +3325,7 @@ def hpat_pandas_series_dropna(self, axis=0, inplace=False): Pandas Series method :meth:`pandas.Series.dropna` implementation. .. only:: developer - Tests: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_dropna* + Tests: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_dropna* Parameters ---------- @@ -3355,7 +3355,7 @@ def hpat_pandas_series_dropna(self, axis=0, inplace=False): def hpat_pandas_series_dropna_impl(self, axis=0, inplace=False): # generate Series index if needed by using SeriesType.index (i.e. not self._index) - na_data_arr = hpat.hiframes.api.get_nan_mask(self._data) + na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) data = self._data[~na_data_arr] index = self.index[~na_data_arr] return pandas.Series(data, index, self._name) @@ -3370,7 +3370,7 @@ def hpat_pandas_series_fillna(self, value=None, method=None, axis=None, inplace= .. only:: developer - Tests: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_fillna* + Tests: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_fillna* Parameters ---------- @@ -3435,7 +3435,7 @@ def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=No return hpat_pandas_series_no_nan_fillna_impl else: def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): - na_data_arr = hpat.hiframes.api.get_nan_mask(self._data) + na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) self._data[na_data_arr] = value return None @@ -3452,14 +3452,14 @@ def hpat_pandas_series_str_fillna_impl(self, value=None, method=None, axis=None, # get total chars in new array for i in numba.parfor.internal_prange(n): s = self._data[i] - if hpat.hiframes.api.isna(self._data, i): + if sdc.hiframes.api.isna(self._data, i): num_chars += len(value) else: num_chars += len(s) - filled_data = hpat.str_arr_ext.pre_alloc_string_array(n, num_chars) + filled_data = sdc.str_arr_ext.pre_alloc_string_array(n, num_chars) for i in numba.parfor.internal_prange(n): - if hpat.hiframes.api.isna(self._data, i): + if sdc.hiframes.api.isna(self._data, i): filled_data[i] = value else: filled_data[i] = self._data[i] @@ -3475,7 +3475,7 @@ def hpat_pandas_series_no_nan_fillna_impl(self, value=None, method=None, axis=No else: def hpat_pandas_series_fillna_impl(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): - na_data_arr = hpat.hiframes.api.get_nan_mask(self._data) + na_data_arr = sdc.hiframes.api.get_nan_mask(self._data) filled_data = numpy.copy(self._data) filled_data[na_data_arr] = value return pandas.Series(filled_data, self._index, self._name) @@ -3492,7 +3492,7 @@ def hpat_pandas_series_cov(self, other, min_periods=None): .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_cov + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_cov Parameters ---------- @@ -3555,7 +3555,7 @@ def hpat_pandas_series_pct_change(self, periods=1, fill_method='pad', limit=None .. only:: developer - Test: python -m hpat.runtests -k hpat.tests.test_series.TestSeries.test_series_pct_change + Test: python -m sdc.runtests -k sdc.tests.test_series.TestSeries.test_series_pct_change Parameters ----------- diff --git a/hpat/datatypes/hpat_pandas_seriesgroupby_functions.py b/sdc/datatypes/hpat_pandas_seriesgroupby_functions.py similarity index 95% rename from hpat/datatypes/hpat_pandas_seriesgroupby_functions.py rename to sdc/datatypes/hpat_pandas_seriesgroupby_functions.py index 4a3d1fdb8..0a0d37bbb 100644 --- a/hpat/datatypes/hpat_pandas_seriesgroupby_functions.py +++ b/sdc/datatypes/hpat_pandas_seriesgroupby_functions.py @@ -39,7 +39,7 @@ from numba.extending import overload_method from numba.errors import TypingError -from hpat.datatypes.hpat_pandas_seriesgroupby_types import SeriesGroupByType +from sdc.datatypes.hpat_pandas_seriesgroupby_types import SeriesGroupByType @overload_method(SeriesGroupByType, 'count') @@ -49,7 +49,7 @@ def hpat_pandas_seriesgroupby_count(self): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_groupby_count + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_groupby_count Parameters ----------- diff --git a/hpat/datatypes/hpat_pandas_seriesgroupby_types.py b/sdc/datatypes/hpat_pandas_seriesgroupby_types.py similarity index 98% rename from hpat/datatypes/hpat_pandas_seriesgroupby_types.py rename to sdc/datatypes/hpat_pandas_seriesgroupby_types.py index 7d840f021..7509c6e87 100644 --- a/hpat/datatypes/hpat_pandas_seriesgroupby_types.py +++ b/sdc/datatypes/hpat_pandas_seriesgroupby_types.py @@ -99,7 +99,7 @@ class SeriesGroupByTypeModel(StructModel): Model for SeriesGroupByType type All members must be the same as main type for this model - Test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_groupby_count + Test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_groupby_count """ def __init__(self, dmm, fe_type): diff --git a/hpat/datatypes/hpat_pandas_stringmethods_functions.py b/sdc/datatypes/hpat_pandas_stringmethods_functions.py similarity index 92% rename from hpat/datatypes/hpat_pandas_stringmethods_functions.py rename to sdc/datatypes/hpat_pandas_stringmethods_functions.py index 06f1583f4..daeda14e0 100644 --- a/hpat/datatypes/hpat_pandas_stringmethods_functions.py +++ b/sdc/datatypes/hpat_pandas_stringmethods_functions.py @@ -73,7 +73,7 @@ def hpat_pandas_stringmethods_upper_impl(self): return hpat_pandas_stringmethods_upper_impl - Test: python -m hpat.runtests hpat.tests.test_hiframes.TestHiFrames.test_str_split_filter + Test: python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_split_filter """ @@ -86,8 +86,8 @@ def hpat_pandas_stringmethods_upper_impl(self): from numba.extending import overload_method from numba.errors import TypingError -from hpat.datatypes.hpat_pandas_stringmethods_types import StringMethodsType -from hpat.str_arr_ext import to_string_list +from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType +from sdc.str_arr_ext import to_string_list _hpat_pandas_stringmethods_autogen_global_dict = { @@ -147,12 +147,12 @@ def hpat_pandas_stringmethods_{methodname}(self{methodparams}): .. only:: developer - Test: python -m hpat.runtests hpat.tests.test_strings.TestStrings.test_str2str - python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_str2str - python -m hpat.runtests hpat.tests.test_hiframes.TestHiFrames.test_str_get - python -m hpat.runtests hpat.tests.test_hiframes.TestHiFrames.test_str_replace_noregex - python -m hpat.runtests hpat.tests.test_hiframes.TestHiFrames.test_str_split - python -m hpat.runtests hpat.tests.test_hiframes.TestHiFrames.test_str_contains_regex + Test: python -m sdc.runtests sdc.tests.test_strings.TestStrings.test_str2str + python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str2str + python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_get + python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_replace_noregex + python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_split + python -m sdc.runtests sdc.tests.test_hiframes.TestHiFrames.test_str_contains_regex Parameters ---------- diff --git a/hpat/datatypes/hpat_pandas_stringmethods_types.py b/sdc/datatypes/hpat_pandas_stringmethods_types.py similarity index 100% rename from hpat/datatypes/hpat_pandas_stringmethods_types.py rename to sdc/datatypes/hpat_pandas_stringmethods_types.py diff --git a/hpat/decorators.py b/sdc/decorators.py similarity index 90% rename from hpat/decorators.py rename to sdc/decorators.py index 9a4587785..f5fb02e51 100644 --- a/hpat/decorators.py +++ b/sdc/decorators.py @@ -29,7 +29,7 @@ ''' import numba -import hpat +import sdc def jit(signature_or_function=None, **options): @@ -40,7 +40,7 @@ def jit(signature_or_function=None, **options): ''' options['nopython'] = True - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: ''' Use Numba compiler pipeline ''' @@ -81,13 +81,13 @@ def jit(signature_or_function=None, **options): # Option MPI is boolean and true by default # it means MPI transport will be used - mpi_transport_requested = options.pop('MPI', hpat.config.config_transport_mpi_default) + mpi_transport_requested = options.pop('MPI', sdc.config.config_transport_mpi_default) if not isinstance(mpi_transport_requested, (int, bool)): raise ValueError("Option MPI or SDC_CONFIG_MPI environment variable should be boolean") if mpi_transport_requested: - hpat.config.config_transport_mpi = True + sdc.config.config_transport_mpi = True else: - hpat.config.config_transport_mpi = False + sdc.config.config_transport_mpi = False - return numba.jit(signature_or_function, pipeline_class=hpat.compiler.SDCPipeline, **options) + return numba.jit(signature_or_function, pipeline_class=sdc.compiler.SDCPipeline, **options) diff --git a/hpat/dict_ext.py b/sdc/dict_ext.py similarity index 99% rename from hpat/dict_ext.py rename to sdc/dict_ext.py index 38134acc4..a20b5ccc2 100644 --- a/hpat/dict_ext.py +++ b/sdc/dict_ext.py @@ -36,12 +36,12 @@ from numba.extending import lower_builtin, overload_method, overload from numba.targets.imputils import (impl_ret_new_ref, impl_ret_borrowed, iternext_impl, RefType) -from hpat.str_ext import string_type, gen_unicode_to_std_str, gen_std_str_to_unicode +from sdc.str_ext import string_type, gen_unicode_to_std_str, gen_std_str_to_unicode from numba import cgutils from llvmlite import ir as lir import llvmlite.binding as ll from . import hdict_ext -from hpat.utils import unliteral_all +from sdc.utils import unliteral_all ll_voidp = lir.IntType(8).as_pointer() diff --git a/hpat/distributed.py b/sdc/distributed.py similarity index 93% rename from hpat/distributed.py rename to sdc/distributed.py index fd008686e..733f8213c 100644 --- a/hpat/distributed.py +++ b/sdc/distributed.py @@ -76,15 +76,15 @@ from numba.compiler_machinery import FunctionPass, register_pass -import hpat -import hpat.utils -from hpat import distributed_api, distributed_lower -from hpat.io.pio_api import h5file_type, h5group_type -from hpat.str_ext import string_type -from hpat.str_arr_ext import string_array_type -from hpat.distributed_api import Reduce_Type -from hpat.distributed_analysis import Distribution, DistributedAnalysis -from hpat.utils import ( +import sdc +import sdc.utils +from sdc import distributed_api, distributed_lower +from sdc.io.pio_api import h5file_type, h5group_type +from sdc.str_ext import string_type +from sdc.str_arr_ext import string_array_type +from sdc.distributed_api import Reduce_Type +from sdc.distributed_analysis import Distribution, DistributedAnalysis +from sdc.utils import ( is_alloc_callname, is_whole_slice, is_array_container, @@ -98,7 +98,7 @@ is_call, is_const_slice, update_globals) -from hpat.hiframes.pd_dataframe_ext import DataFrameType +from sdc.hiframes.pd_dataframe_ext import DataFrameType distributed_run_extensions = {} @@ -175,7 +175,7 @@ def run_pass(self): dprint_func_ir(self.state.func_ir, "after distributed pass") lower_parfor_sequential( self.state.typingctx, self.state.func_ir, self.state.typemap, self.state.calltypes) - if hpat.multithread_mode: + if sdc.multithread_mode: # parfor params need to be updated for multithread_mode since some # new variables like alloc_start are introduced by distributed pass # and are used in later parfors @@ -365,8 +365,8 @@ def _run_expr(self, inst, namevar_table): def _gen_1D_Var_len(self, arr): def f(A, op): # pragma: no cover c = len(A) - res = hpat.distributed_api.dist_reduce(c, op) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, self.state.typingctx, + res = sdc.distributed_api.dist_reduce(c, op) + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[arr.name], types.int32), self.state.typemap, self.state.calltypes).blocks.popitem()[1] replace_arg_nodes( @@ -390,11 +390,11 @@ def _gen_dist_inits(self): self.state.typemap[self._set0_var.name] = types.int64 set0_assign = ir.Assign(ir.Const(0, loc), self._set0_var, loc) out.append(set0_assign) - # g_dist_var = Global(hpat.distributed_api) + # g_dist_var = Global(sdc.distributed_api) g_dist_var = ir.Var(scope, mk_unique_var("$distributed_g_var"), loc) self._g_dist_var = g_dist_var - self.state.typemap[g_dist_var.name] = types.misc.Module(hpat.distributed_api) - g_dist = ir.Global('distributed_api', hpat.distributed_api, loc) + self.state.typemap[g_dist_var.name] = types.misc.Module(sdc.distributed_api) + g_dist = ir.Global('distributed_api', sdc.distributed_api, loc) g_dist_assign = ir.Assign(g_dist, g_dist_var, loc) # attr call: rank_attr = getattr(g_dist_var, get_rank) rank_attr_call = ir.Expr.getattr(g_dist_var, "get_rank", loc) @@ -402,7 +402,7 @@ def _gen_dist_inits(self): self.state.typemap[rank_attr_var.name] = get_global_func_typ( distributed_api.get_rank) rank_attr_assign = ir.Assign(rank_attr_call, rank_attr_var, loc) - # rank_var = hpat.distributed_api.get_rank() + # rank_var = sdc.distributed_api.get_rank() rank_var = ir.Var(scope, mk_unique_var("$rank"), loc) self.state.typemap[rank_var.name] = types.int32 rank_call = ir.Expr.call(rank_attr_var, [], (), loc) @@ -418,7 +418,7 @@ def _gen_dist_inits(self): self.state.typemap[size_attr_var.name] = get_global_func_typ( distributed_api.get_size) size_attr_assign = ir.Assign(size_attr_call, size_attr_var, loc) - # size_var = hpat.distributed_api.get_size() + # size_var = sdc.distributed_api.get_size() size_var = ir.Var(scope, mk_unique_var("$dist_size"), loc) self.state.typemap[size_var.name] = types.int32 size_call = ir.Expr.call(size_attr_var, [], (), loc) @@ -511,7 +511,7 @@ def _run_call(self, assign): out[-1].target = assign.target self.oneDVar_len_vars[assign.target.name] = arr_var - if (hpat.config._has_h5py and (func_mod == 'hpat.io.pio_api' + if (sdc.config._has_h5py and (func_mod == 'sdc.io.pio_api' and func_name in ('h5read', 'h5write', 'h5read_filter')) and self._is_1D_arr(rhs.args[5].name)): # TODO: make create_dataset/create_group collective @@ -537,7 +537,7 @@ def _run_call(self, assign): file_varname = rhs.args[0].name self._file_open_set_parallel(file_varname) - if hpat.config._has_h5py and (func_mod == 'hpat.io.pio_api' + if sdc.config._has_h5py and (func_mod == 'sdc.io.pio_api' and func_name == 'get_filter_read_indices'): # out += self._gen_1D_Var_len(assign.target) @@ -550,8 +550,8 @@ def _run_call(self, assign): self._array_counts[lhs] = [count_var] out += g_out - if (hpat.config._has_pyarrow - and fdef == ('read_parquet', 'hpat.io.parquet_pio') + if (sdc.config._has_pyarrow + and fdef == ('read_parquet', 'sdc.io.parquet_pio') and self._is_1D_arr(rhs.args[2].name)): arr = rhs.args[2].name assert len(self._array_starts[arr]) == 1, "only 1D arrs in parquet" @@ -560,13 +560,13 @@ def _run_call(self, assign): rhs.args += [start_var, count_var] def f(fname, cindex, arr, out_dtype, start, count): # pragma: no cover - return hpat.io.parquet_pio.read_parquet_parallel(fname, cindex, + return sdc.io.parquet_pio.read_parquet_parallel(fname, cindex, arr, out_dtype, start, count) return self._replace_func(f, rhs.args) - if (hpat.config._has_pyarrow - and fdef == ('read_parquet_str', 'hpat.io.parquet_pio') + if (sdc.config._has_pyarrow + and fdef == ('read_parquet_str', 'sdc.io.parquet_pio') and self._is_1D_arr(lhs)): arr = lhs size_var = rhs.args[2] @@ -580,10 +580,10 @@ def f(fname, cindex, arr, out_dtype, start, count): # pragma: no cover rhs.args.append(count_var) def f(fname, cindex, start, count): # pragma: no cover - return hpat.io.parquet_pio.read_parquet_str_parallel(fname, cindex, + return sdc.io.parquet_pio.read_parquet_str_parallel(fname, cindex, start, count) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, self.state.typingctx, + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[rhs.args[0].name], types.intp, types.intp, types.intp), self.state.typemap, self.state.calltypes).blocks.popitem()[1] @@ -592,7 +592,7 @@ def f(fname, cindex, start, count): # pragma: no cover out[-1].target = assign.target # TODO: fix numba.extending - if hpat.config._has_xenon and (fdef == ('read_xenon_col', 'numba.extending') + if sdc.config._has_xenon and (fdef == ('read_xenon_col', 'numba.extending') and self._is_1D_arr(rhs.args[3].name)): arr = rhs.args[3].name assert len(self._array_starts[arr]) == 1, "only 1D arrs in Xenon" @@ -601,12 +601,12 @@ def f(fname, cindex, start, count): # pragma: no cover rhs.args += [start_var, count_var] def f(connect_tp, dset_tp, col_id_tp, column_tp, schema_arr_tp, start, count): # pragma: no cover - return hpat.io.xenon_ext.read_xenon_col_parallel( + return sdc.io.xenon_ext.read_xenon_col_parallel( connect_tp, dset_tp, col_id_tp, column_tp, schema_arr_tp, start, count) return self._replace_func(f, rhs.args) - if hpat.config._has_xenon and (fdef == ('read_xenon_str', 'numba.extending') + if sdc.config._has_xenon and (fdef == ('read_xenon_str', 'numba.extending') and self._is_1D_arr(lhs)): arr = lhs size_var = rhs.args[3] @@ -621,14 +621,14 @@ def f(connect_tp, dset_tp, col_id_tp, column_tp, schema_arr_tp, start, count): rhs.args.append(count_var) def f(connect_tp, dset_tp, col_id_tp, schema_arr_tp, start_tp, count_tp): # pragma: no cover - return hpat.io.xenon_ext.read_xenon_str_parallel( + return sdc.io.xenon_ext.read_xenon_str_parallel( connect_tp, dset_tp, col_id_tp, schema_arr_tp, start_tp, count_tp) f_block = compile_to_numba_ir(f, - {'hpat': hpat}, + {'sdc': sdc}, self.state.typingctx, - (hpat.io.xenon_ext.xe_connect_type, - hpat.io.xenon_ext.xe_dset_type, + (sdc.io.xenon_ext.xe_connect_type, + sdc.io.xenon_ext.xe_dset_type, types.intp, self.state.typemap[rhs.args[3].name], types.intp, @@ -639,8 +639,8 @@ def f(connect_tp, dset_tp, col_id_tp, schema_arr_tp, start_tp, count_tp): # pra out += f_block.body[:-2] out[-1].target = assign.target - if (hpat.config._has_ros - and fdef == ('read_ros_images_inner', 'hpat.ros') + if (sdc.config._has_ros + and fdef == ('read_ros_images_inner', 'sdc.ros') and self._is_1D_arr(rhs.args[0].name)): arr = rhs.args[0].name assert len(self._array_starts[arr]) == 4, "only 4D arrs in ros" @@ -649,12 +649,12 @@ def f(connect_tp, dset_tp, col_id_tp, schema_arr_tp, start_tp, count_tp): # pra rhs.args += [start_var, count_var] def f(arr, bag, start, count): # pragma: no cover - return hpat.ros.read_ros_images_inner_parallel(arr, bag, + return sdc.ros.read_ros_images_inner_parallel(arr, bag, start, count) return self._replace_func(f, rhs.args) - if (func_mod == 'hpat.hiframes.api' and func_name in ( + if (func_mod == 'sdc.hiframes.api' and func_name in ( 'to_arr_from_series', 'ts_series_to_arr_typ', 'to_date_series_type', 'init_series') and self._is_1D_arr(rhs.args[0].name)): @@ -664,21 +664,21 @@ def f(arr, bag, start, count): # pragma: no cover self._array_counts[lhs] = self._array_counts[in_arr] self._array_sizes[lhs] = self._array_sizes[in_arr] - if (fdef == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext') + if (fdef == ('init_dataframe', 'sdc.hiframes.pd_dataframe_ext') and self._is_1D_arr(rhs.args[0].name)): in_arr = rhs.args[0].name self._array_starts[lhs] = self._array_starts[in_arr] self._array_counts[lhs] = self._array_counts[in_arr] self._array_sizes[lhs] = self._array_sizes[in_arr] - if (fdef == ('compute_split_view', 'hpat.hiframes.split_impl') + if (fdef == ('compute_split_view', 'sdc.hiframes.split_impl') and self._is_1D_arr(rhs.args[0].name)): in_arr = rhs.args[0].name self._array_starts[lhs] = self._array_starts[in_arr] self._array_counts[lhs] = self._array_counts[in_arr] self._array_sizes[lhs] = self._array_sizes[in_arr] - if (fdef == ('get_split_view_index', 'hpat.hiframes.split_impl') + if (fdef == ('get_split_view_index', 'sdc.hiframes.split_impl') and self._is_1D_arr(rhs.args[0].name)): arr = rhs.args[0] index_var = rhs.args[1] @@ -689,7 +689,7 @@ def f(arr, bag, start, count): # pragma: no cover out.append(assign) return out - if (fdef == ('setitem_str_arr_ptr', 'hpat.str_arr_ext') + if (fdef == ('setitem_str_arr_ptr', 'sdc.str_arr_ext') and self._is_1D_arr(rhs.args[0].name)): arr = rhs.args[0] index_var = rhs.args[1] @@ -700,7 +700,7 @@ def f(arr, bag, start, count): # pragma: no cover out.append(assign) return out - if (fdef == ('str_arr_item_to_numeric', 'hpat.str_arr_ext') + if (fdef == ('str_arr_item_to_numeric', 'sdc.str_arr_ext') and self._is_1D_arr(rhs.args[0].name)): # TODO: test parallel arr = rhs.args[0] @@ -719,7 +719,7 @@ def f(arr, bag, start, count): # pragma: no cover out.append(assign) return out - if fdef == ('isna', 'hpat.hiframes.api') and self._is_1D_arr(rhs.args[0].name): + if fdef == ('isna', 'sdc.hiframes.api') and self._is_1D_arr(rhs.args[0].name): # fix index in call to isna arr = rhs.args[0] ind = rhs.args[1] @@ -727,7 +727,7 @@ def f(arr, bag, start, count): # pragma: no cover rhs.args[1] = out[-1].target out.append(assign) - if fdef == ('rolling_fixed', 'hpat.hiframes.rolling') and ( + if fdef == ('rolling_fixed', 'sdc.hiframes.rolling') and ( self._is_1D_arr(rhs.args[0].name) or self._is_1D_Var_arr(rhs.args[0].name)): in_arr = rhs.args[0].name @@ -741,7 +741,7 @@ def f(arr, bag, start, count): # pragma: no cover rhs.args[3] = true_var out = [ir.Assign(ir.Const(True, loc), true_var, loc), assign] - if fdef == ('rolling_variable', 'hpat.hiframes.rolling') and ( + if fdef == ('rolling_variable', 'sdc.hiframes.rolling') and ( self._is_1D_arr(rhs.args[0].name) or self._is_1D_Var_arr(rhs.args[0].name)): in_arr = rhs.args[0].name @@ -755,7 +755,7 @@ def f(arr, bag, start, count): # pragma: no cover rhs.args[4] = true_var out = [ir.Assign(ir.Const(True, loc), true_var, loc), assign] - if (func_mod == 'hpat.hiframes.rolling' + if (func_mod == 'sdc.hiframes.rolling' and func_name in ('shift', 'pct_change') and (self._is_1D_arr(rhs.args[0].name) or self._is_1D_Var_arr(rhs.args[0].name))): @@ -770,7 +770,7 @@ def f(arr, bag, start, count): # pragma: no cover rhs.args[2] = true_var out = [ir.Assign(ir.Const(True, loc), true_var, loc), assign] - if fdef == ('quantile', 'hpat.hiframes.api') and (self._is_1D_arr(rhs.args[0].name) + if fdef == ('quantile', 'sdc.hiframes.api') and (self._is_1D_arr(rhs.args[0].name) or self._is_1D_Var_arr(rhs.args[0].name)): arr = rhs.args[0].name if arr in self._array_sizes: @@ -782,63 +782,63 @@ def f(arr, bag, start, count): # pragma: no cover rhs.args += [size_var] def f(arr, q, size): - return hpat.hiframes.api.quantile_parallel(arr, q, size) + return sdc.hiframes.api.quantile_parallel(arr, q, size) return self._replace_func(f, rhs.args) if fdef == ( - 'nunique', 'hpat.hiframes.api') and ( + 'nunique', 'sdc.hiframes.api') and ( self._is_1D_arr( rhs.args[0].name) or self._is_1D_Var_arr( rhs.args[0].name)): def f(arr): - return hpat.hiframes.api.nunique_parallel(arr) + return sdc.hiframes.api.nunique_parallel(arr) return self._replace_func(f, rhs.args) if fdef == ( - 'unique', 'hpat.hiframes.api') and ( + 'unique', 'sdc.hiframes.api') and ( self._is_1D_arr( rhs.args[0].name) or self._is_1D_Var_arr( rhs.args[0].name)): def f(arr): - return hpat.hiframes.api.unique_parallel(arr) + return sdc.hiframes.api.unique_parallel(arr) return self._replace_func(f, rhs.args) if fdef == ( - 'nlargest', 'hpat.hiframes.api') and ( + 'nlargest', 'sdc.hiframes.api') and ( self._is_1D_arr( rhs.args[0].name) or self._is_1D_Var_arr( rhs.args[0].name)): def f(arr, k, i, f): - return hpat.hiframes.api.nlargest_parallel(arr, k, i, f) + return sdc.hiframes.api.nlargest_parallel(arr, k, i, f) return self._replace_func(f, rhs.args) if fdef == ( - 'median', 'hpat.hiframes.api') and ( + 'median', 'sdc.hiframes.api') and ( self._is_1D_arr( rhs.args[0].name) or self._is_1D_Var_arr( rhs.args[0].name)): def f(arr): - return hpat.hiframes.api.median(arr, True) + return sdc.hiframes.api.median(arr, True) return self._replace_func(f, rhs.args) - if fdef == ('convert_rec_to_tup', 'hpat.hiframes.api'): + if fdef == ('convert_rec_to_tup', 'sdc.hiframes.api'): # optimize Series back to back map pattern with tuples # TODO: create another optimization pass? arg_def = guard(get_definition, self.state.func_ir, rhs.args[0]) if (is_call(arg_def) and guard(find_callname, self.state.func_ir, arg_def) - == ('convert_tup_to_rec', 'hpat.hiframes.api')): + == ('convert_tup_to_rec', 'sdc.hiframes.api')): assign.value = arg_def.args[0] return out - if fdef == ('dist_return', 'hpat.distributed_api'): + if fdef == ('dist_return', 'sdc.distributed_api'): # always rebalance returned distributed arrays # TODO: need different flag for 1D_Var return (distributed_var)? # TODO: rebalance strings? @@ -846,9 +846,9 @@ def f(arr): assign.value = rhs.args[0] return [assign] - if ((fdef == ('get_series_data', 'hpat.hiframes.api') - or fdef == ('get_series_index', 'hpat.hiframes.api') - or fdef == ('get_dataframe_data', 'hpat.hiframes.pd_dataframe_ext'))): + if ((fdef == ('get_series_data', 'sdc.hiframes.api') + or fdef == ('get_series_index', 'sdc.hiframes.api') + or fdef == ('get_dataframe_data', 'sdc.hiframes.pd_dataframe_ext'))): out = [assign] arr = assign.target # gen len() using 1D_Var reduce approach. @@ -871,31 +871,31 @@ def f(arr): return out - if fdef == ('threaded_return', 'hpat.distributed_api'): + if fdef == ('threaded_return', 'sdc.distributed_api'): assign.value = rhs.args[0] return [assign] - if fdef == ('rebalance_array', 'hpat.distributed_api'): + if fdef == ('rebalance_array', 'sdc.distributed_api'): return self._run_call_rebalance_array(lhs, assign, rhs.args) # output of mnb.predict is 1D with same size as 1st dimension of input # TODO: remove ml module and use new DAAL API if func_name == 'predict': getattr_call = guard(get_definition, self.state.func_ir, func_var) - if (getattr_call and self.state.typemap[getattr_call.value.name] == hpat.ml.naive_bayes.mnb_type): + if (getattr_call and self.state.typemap[getattr_call.value.name] == sdc.ml.naive_bayes.mnb_type): in_arr = rhs.args[0].name self._array_starts[lhs] = [self._array_starts[in_arr][0]] self._array_counts[lhs] = [self._array_counts[in_arr][0]] self._array_sizes[lhs] = [self._array_sizes[in_arr][0]] - if fdef == ('file_read', 'hpat.io.np_io') and rhs.args[1].name in self._array_starts: + if fdef == ('file_read', 'sdc.io.np_io') and rhs.args[1].name in self._array_starts: _fname = rhs.args[0] _data_ptr = rhs.args[1] _start = self._array_starts[_data_ptr.name][0] _count = self._array_counts[_data_ptr.name][0] def f(fname, data_ptr, start, count): # pragma: no cover - return hpat.io.np_io.file_read_parallel(fname, data_ptr, start, count) + return sdc.io.np_io.file_read_parallel(fname, data_ptr, start, count) return self._replace_func(f, [_fname, _data_ptr, _start, _count]) return out @@ -906,7 +906,7 @@ def _run_call_np(self, lhs, func_name, assign, args): # allocs are handled separately is_1D_bool = (self._is_1D_Var_arr(lhs) or self._is_1D_arr(lhs)) err_str = "allocation calls handled separately 'empty', 'zeros', 'ones', 'full' etc." - assert not (is_1D_bool and func_name in hpat.utils.np_alloc_callnames), err_str + assert not (is_1D_bool and func_name in sdc.utils.np_alloc_callnames), err_str out = [assign] scope = assign.target.scope @@ -1030,7 +1030,7 @@ def _run_call_array(self, lhs, arr, func_name, assign, args): _count = self._array_counts[arr.name][0] def f(fname, arr, start, count): # pragma: no cover - return hpat.io.np_io.file_write_parallel(fname, arr, start, count) + return sdc.io.np_io.file_write_parallel(fname, arr, start, count) return self._replace_func(f, [_fname, arr, _start, _count]) @@ -1039,8 +1039,8 @@ def f(fname, arr, start, count): # pragma: no cover def f(fname, arr): # pragma: no cover count = len(arr) - start = hpat.distributed_api.dist_exscan(count) - return hpat.io.np_io.file_write_parallel(fname, arr, start, count) + start = sdc.distributed_api.dist_exscan(count) + return sdc.io.np_io.file_write_parallel(fname, arr, start, count) return self._replace_func(f, [_fname, arr]) @@ -1057,7 +1057,7 @@ def _run_call_df(self, lhs, df, func_name, assign, args): # df2 = df(index=range(index_start, index_start+l)) # header = header and is_root # only first line has header # str_out = df2.to_csv(None, header=header) - # hpat.io.np_io._file_write_parallel(fname, str_out) + # sdc.io.np_io._file_write_parallel(fname, str_out) df_typ = self.state.typemap[df.name] rhs = assign.value @@ -1128,8 +1128,8 @@ def _run_call_df(self, lhs, df, func_name, assign, args): def f(fname, str_out): # pragma: no cover count = len(str_out) - start = hpat.distributed_api.dist_exscan(count) - hpat.io.np_io._file_write_parallel( + start = sdc.distributed_api.dist_exscan(count) + sdc.io.np_io._file_write_parallel( fname._data, str_out._data, start, count, 1) dummy_use(str_out) @@ -1140,8 +1140,8 @@ def f(fname, str_out): # pragma: no cover def _gen_is_root_and_cond(self, cond_var): def f(cond): - return cond & (hpat.distributed_api.get_rank() == 0) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, + return cond & (sdc.distributed_api.get_rank() == 0) + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[cond_var.name],), self.state.typemap, @@ -1153,12 +1153,12 @@ def f(cond): def _fix_parallel_df_index(self, df): def f(df): # pragma: no cover length = len(df) - start = hpat.distributed_api.dist_exscan(length) + start = sdc.distributed_api.dist_exscan(length) ind = np.arange(start, start + length) - df2 = hpat.hiframes.pd_dataframe_ext.set_df_index(df, ind) + df2 = sdc.hiframes.pd_dataframe_ext.set_df_index(df, ind) return df2 - f_block = compile_to_numba_ir(f, {'hpat': hpat, 'np': np}, + f_block = compile_to_numba_ir(f, {'sdc': sdc, 'np': np}, self.state.typingctx, (self.state.typemap[df.name],), self.state.typemap, @@ -1172,9 +1172,9 @@ def _run_permutation_int(self, assign, args): n = args[0] def f(lhs, n): - hpat.distributed_lower.dist_permutation_int(lhs, n) + sdc.distributed_lower.dist_permutation_int(lhs, n) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[lhs.name], types.intp), self.state.typemap, @@ -1199,10 +1199,10 @@ def _run_permutation_array_index(self, lhs, rhs, idx): *self._array_sizes[lhs.name][1:]), dtype, scope, loc) def f(lhs, lhs_len, dtype_size, rhs, idx, idx_len): - hpat.distributed_lower.dist_permutation_array_index( + sdc.distributed_lower.dist_permutation_array_index( lhs, lhs_len, dtype_size, rhs, idx, idx_len) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[lhs.name], types.intp, @@ -1242,10 +1242,10 @@ def _run_reshape(self, assign, in_arr, args): new_local_shape_var, dtype, scope, loc) def f(lhs, in_arr, new_0dim_global_len, old_0dim_global_len, dtype_size): # pragma: no cover - hpat.distributed_lower.dist_oneD_reshape_shuffle( + sdc.distributed_lower.dist_oneD_reshape_shuffle( lhs, in_arr, new_0dim_global_len, old_0dim_global_len, dtype_size) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[lhs.name], self.state.typemap[in_arr.name], types.intp, types.intp, types.intp), @@ -1293,9 +1293,9 @@ def _run_call_rebalance_array(self, lhs, assign, args): self._array_sizes[lhs][0] = total_length def f(arr, count): # pragma: no cover - b_arr = hpat.distributed_api.rebalance_array_parallel(arr, count) + b_arr = sdc.distributed_api.rebalance_array_parallel(arr, count) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, self.state.typingctx, + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[arr.name], types.intp), self.state.typemap, self.state.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [arr, count_var]) @@ -1425,7 +1425,7 @@ def _fix_1D_Var_alloc(self, size_var, lhs, scope, loc): def f(oneD_var_arr): # pragma: no cover arr_len = len(oneD_var_arr) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, self.state.typingctx, + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[arr_var.name],), self.state.typemap, self.state.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [arr_var]) @@ -1458,7 +1458,7 @@ def f(oneD_var_arr): # pragma: no cover def f(oneD_var_arr): # pragma: no cover arr_len = len(oneD_var_arr) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, self.state.typingctx, + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[arr_var.name],), self.state.typemap, self.state.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [arr_var]) @@ -1584,7 +1584,7 @@ def _run_getsetitem(self, arr, index_var, node, full_node): if isinstance(self.state.typemap[index_var.name], types.Integer): def f(A, val, index, chunk_start, chunk_count): # pragma: no cover - hpat.distributed_lower._set_if_in_range( + sdc.distributed_lower._set_if_in_range( A, val, index, chunk_start, chunk_count) return self._replace_func( @@ -1596,7 +1596,7 @@ def f(A, val, index, chunk_start, chunk_count): # pragma: no cover # convert setitem with global range to setitem with local range # that overlaps with the local array chunk def f(A, val, start, stop, chunk_start, chunk_count): # pragma: no cover - loc_start, loc_stop = hpat.distributed_lower._get_local_range( + loc_start, loc_stop = sdc.distributed_lower._get_local_range( start, stop, chunk_start, chunk_count) A[loc_start:loc_stop] = val @@ -1687,7 +1687,7 @@ def f(A, start, step): start = self._array_starts[in_arr.name][0] count = self._array_counts[in_arr.name][0] return self._replace_func( - lambda arr, slice_index, start, count: hpat.distributed_api.const_slice_getitem( + lambda arr, slice_index, start, count: sdc.distributed_api.const_slice_getitem( arr, slice_index, start, count), [in_arr, index_var, start, count]) return out @@ -1697,7 +1697,7 @@ def _run_parfor(self, parfor, namevar_table): # parfor, self.state.typemap) # Thread and 1D parfors turn to gufunc in multithread mode - if (hpat.multithread_mode + if (sdc.multithread_mode and self._dist_analysis.parfor_dists[parfor.id] != Distribution.REP): parfor.no_sequential_lowering = True @@ -1770,7 +1770,7 @@ def _run_parfor_1D_Var(self, parfor, namevar_table): def f(A): # pragma: no cover arr_len = len(A) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, self.state.typingctx, + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, (self.state.typemap[arr_var.name],), self.state.typemap, self.state.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [arr_var]) @@ -1803,12 +1803,12 @@ def f(A): # pragma: no cover l_nest.start = start_var def _fix_ind_bounds(start, stop): - prefix = hpat.distributed_api.dist_exscan(stop - start) - # rank = hpat.distributed_api.get_rank() + prefix = sdc.distributed_api.dist_exscan(stop - start) + # rank = sdc.distributed_api.get_rank() # print(rank, prefix, start, stop) return start + prefix, stop + prefix - f_block = compile_to_numba_ir(_fix_ind_bounds, {'hpat': hpat}, + f_block = compile_to_numba_ir(_fix_ind_bounds, {'sdc': sdc}, self.state.typingctx, (types.intp, types.intp), self.state.typemap, self.state.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [l_nest.start, l_nest.stop]) @@ -1842,8 +1842,8 @@ def _run_arg(self, assign): return None # TODO: comprehensive support for Series vars - from hpat.hiframes.pd_series_ext import SeriesType - if isinstance(typ, (SeriesType, hpat.hiframes.pd_dataframe_ext.DataFrameType)): + from sdc.hiframes.pd_series_ext import SeriesType + if isinstance(typ, (SeriesType, sdc.hiframes.pd_dataframe_ext.DataFrameType)): return None # gen len() using 1D_Var reduce approach. @@ -2063,9 +2063,9 @@ def _file_open_set_parallel(self, file_varname): def _gen_barrier(self): def f(): # pragma: no cover - return hpat.distributed_api.barrier() + return sdc.distributed_api.barrier() - f_blocks = compile_to_numba_ir(f, {'hpat': hpat}, self.state.typingctx, {}, + f_blocks = compile_to_numba_ir(f, {'sdc': sdc}, self.state.typingctx, {}, self.state.typemap, self.state.calltypes).blocks block = f_blocks[min(f_blocks.keys())] return block.body[:-2] # remove return @@ -2076,10 +2076,10 @@ def _gen_reduce(self, reduce_var, reduce_op, scope, loc): op_assign = ir.Assign(ir.Const(reduce_op.value, loc), op_var, loc) def f(val, op): # pragma: no cover - hpat.distributed_api.dist_reduce(val, op) + sdc.distributed_api.dist_reduce(val, op) f_ir = compile_to_numba_ir(f, - {'hpat': hpat}, + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[reduce_var.name], types.int32), self.state.typemap, @@ -2150,13 +2150,13 @@ def _gen_init_reduce(self, reduce_var, reduce_op): pre_init_val = "v = np.full_like(s, {}, s.dtype)".format(init_val) init_val = "v" - f_text = "def f(s):\n {}\n s = hpat.distributed_lower._root_rank_select(s, {})".format(pre_init_val, init_val) + f_text = "def f(s):\n {}\n s = sdc.distributed_lower._root_rank_select(s, {})".format(pre_init_val, init_val) loc_vars = {} - exec(f_text, {'hpat': hpat}, loc_vars) + exec(f_text, {'sdc': sdc}, loc_vars) f = loc_vars['f'] f_block = compile_to_numba_ir(f, - {'hpat': hpat, 'numba': numba, 'np': np}, + {'sdc': sdc, 'numba': numba, 'np': np}, self.state.typingctx, (red_var_typ,), self.state.typemap, @@ -2217,7 +2217,7 @@ def _get_arg(self, f_name, args, kws, arg_no, arg_name, default=None, err_msg=No return arg def _replace_func(self, func, args, const=False, pre_nodes=None, extra_globals=None): - glbls = {'numba': numba, 'np': np, 'hpat': hpat} + glbls = {'numba': numba, 'np': np, 'sdc': sdc} if extra_globals is not None: glbls.update(extra_globals) arg_typs = tuple(self.state.typemap[v.name] for v in args) diff --git a/hpat/distributed_analysis.py b/sdc/distributed_analysis.py similarity index 92% rename from hpat/distributed_analysis.py rename to sdc/distributed_analysis.py index 592f5826d..444822859 100644 --- a/hpat/distributed_analysis.py +++ b/sdc/distributed_analysis.py @@ -40,15 +40,15 @@ from numba.parfor import wrap_parfor_blocks, unwrap_parfor_blocks import numpy as np -import hpat -import hpat.io -import hpat.io.np_io -from hpat.hiframes.pd_series_ext import SeriesType -from hpat.utils import (get_constant, is_alloc_callname, +import sdc +import sdc.io +import sdc.io.np_io +from sdc.hiframes.pd_series_ext import SeriesType +from sdc.utils import (get_constant, is_alloc_callname, is_whole_slice, is_array, is_array_container, is_np_array, find_build_tuple, debug_prints, is_const_slice) -from hpat.hiframes.pd_dataframe_ext import DataFrameType +from sdc.hiframes.pd_dataframe_ext import DataFrameType from enum import Enum @@ -318,8 +318,8 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists): self._analyze_call_df(lhs, func_mod, func_name, args, array_dists) return - # hpat.distributed_api functions - if isinstance(func_mod, str) and func_mod == 'hpat.distributed_api': + # sdc.distributed_api functions + if isinstance(func_mod, str) and func_mod == 'sdc.distributed_api': self._analyze_call_hpat_dist(lhs, func_name, args, array_dists) return @@ -327,25 +327,25 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists): if func_name == 'len' and func_mod in ('__builtin__', 'builtins'): return - if hpat.config._has_h5py and (func_mod == 'hpat.io.pio_api' + if sdc.config._has_h5py and (func_mod == 'sdc.io.pio_api' and func_name in ('h5read', 'h5write', 'h5read_filter')): return - if hpat.config._has_h5py and (func_mod == 'hpat.io.pio_api' + if sdc.config._has_h5py and (func_mod == 'sdc.io.pio_api' and func_name == 'get_filter_read_indices'): if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return - if fdef == ('quantile', 'hpat.hiframes.api'): + if fdef == ('quantile', 'sdc.hiframes.api'): # quantile doesn't affect input's distribution return - if fdef == ('nunique', 'hpat.hiframes.api'): + if fdef == ('nunique', 'sdc.hiframes.api'): # nunique doesn't affect input's distribution return - if fdef == ('unique', 'hpat.hiframes.api'): + if fdef == ('unique', 'sdc.hiframes.api'): # doesn't affect distribution of input since input can stay 1D if lhs not in array_dists: array_dists[lhs] = Distribution.OneD_Var @@ -355,46 +355,46 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists): array_dists[lhs] = new_dist return - if fdef == ('rolling_fixed', 'hpat.hiframes.rolling'): + if fdef == ('rolling_fixed', 'sdc.hiframes.rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return - if fdef == ('rolling_variable', 'hpat.hiframes.rolling'): + if fdef == ('rolling_variable', 'sdc.hiframes.rolling'): # lhs, in_arr, on_arr should have the same distribution new_dist = self._meet_array_dists(lhs, rhs.args[0].name, array_dists) new_dist = self._meet_array_dists(lhs, rhs.args[1].name, array_dists, new_dist) array_dists[rhs.args[0].name] = new_dist return - if fdef == ('shift', 'hpat.hiframes.rolling'): + if fdef == ('shift', 'sdc.hiframes.rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return - if fdef == ('pct_change', 'hpat.hiframes.rolling'): + if fdef == ('pct_change', 'sdc.hiframes.rolling'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return - if fdef == ('nlargest', 'hpat.hiframes.api'): + if fdef == ('nlargest', 'sdc.hiframes.api'): # output of nlargest is REP array_dists[lhs] = Distribution.REP return - if fdef == ('median', 'hpat.hiframes.api'): + if fdef == ('median', 'sdc.hiframes.api'): return - if fdef == ('concat', 'hpat.hiframes.api'): + if fdef == ('concat', 'sdc.hiframes.api'): # hiframes concat is similar to np.concatenate self._analyze_call_np_concatenate(lhs, args, array_dists) return - if fdef == ('isna', 'hpat.hiframes.api'): + if fdef == ('isna', 'sdc.hiframes.api'): return - if fdef == ('get_series_name', 'hpat.hiframes.api'): + if fdef == ('get_series_name', 'sdc.hiframes.api'): return # dummy hiframes functions - if func_mod == 'hpat.hiframes.api' and func_name in ('get_series_data', + if func_mod == 'sdc.hiframes.api' and func_name in ('get_series_data', 'get_series_index', 'to_arr_from_series', 'ts_series_to_arr_typ', 'to_date_series_type', 'dummy_unbox_series', @@ -403,7 +403,7 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return - if fdef == ('init_series', 'hpat.hiframes.api'): + if fdef == ('init_series', 'sdc.hiframes.api'): # lhs, in_arr, and index should have the same distribution new_dist = self._meet_array_dists(lhs, rhs.args[0].name, array_dists) if len(rhs.args) > 1 and self.typemap[rhs.args[1].name] != types.none: @@ -411,7 +411,7 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists): array_dists[rhs.args[0].name] = new_dist return - if fdef == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('init_dataframe', 'sdc.hiframes.pd_dataframe_ext'): # lhs, data arrays, and index should have the same distribution df_typ = self.typemap[lhs] n_cols = len(df_typ.columns) @@ -424,30 +424,30 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists): array_dists[rhs.args[i].name] = new_dist return - if fdef == ('get_dataframe_data', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('get_dataframe_data', 'sdc.hiframes.pd_dataframe_ext'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return - if fdef == ('compute_split_view', 'hpat.hiframes.split_impl'): + if fdef == ('compute_split_view', 'sdc.hiframes.split_impl'): self._meet_array_dists(lhs, rhs.args[0].name, array_dists) return - if fdef == ('get_split_view_index', 'hpat.hiframes.split_impl'): + if fdef == ('get_split_view_index', 'sdc.hiframes.split_impl'): # just used in str.get() implementation for now so we know it is # parallel # TODO: handle index similar to getitem to support more cases return - if fdef == ('get_split_view_data_ptr', 'hpat.hiframes.split_impl'): + if fdef == ('get_split_view_data_ptr', 'sdc.hiframes.split_impl'): return - if fdef == ('setitem_str_arr_ptr', 'hpat.str_arr_ext'): + if fdef == ('setitem_str_arr_ptr', 'sdc.str_arr_ext'): return - if fdef == ('num_total_chars', 'hpat.str_arr_ext'): + if fdef == ('num_total_chars', 'sdc.str_arr_ext'): return - if fdef == ('_series_dropna_str_alloc_impl_inner', 'hpat.hiframes.series_kernels'): + if fdef == ('_series_dropna_str_alloc_impl_inner', 'sdc.hiframes.series_kernels'): if lhs not in array_dists: array_dists[lhs] = Distribution.OneD_Var in_dist = array_dists[rhs.args[0].name] @@ -459,41 +459,41 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists): array_dists[rhs.args[0].name] = out_dist return - if (fdef == ('copy_non_null_offsets', 'hpat.str_arr_ext') - or fdef == ('copy_data', 'hpat.str_arr_ext')): + if (fdef == ('copy_non_null_offsets', 'sdc.str_arr_ext') + or fdef == ('copy_data', 'sdc.str_arr_ext')): out_arrname = rhs.args[0].name in_arrname = rhs.args[1].name self._meet_array_dists(out_arrname, in_arrname, array_dists) return - if fdef == ('str_arr_item_to_numeric', 'hpat.str_arr_ext'): + if fdef == ('str_arr_item_to_numeric', 'sdc.str_arr_ext'): out_arrname = rhs.args[0].name in_arrname = rhs.args[2].name self._meet_array_dists(out_arrname, in_arrname, array_dists) return # np.fromfile() - if fdef == ('file_read', 'hpat.io.np_io'): + if fdef == ('file_read', 'sdc.io.np_io'): return - if hpat.config._has_ros and fdef == ('read_ros_images_inner', 'hpat.ros'): + if sdc.config._has_ros and fdef == ('read_ros_images_inner', 'sdc.ros'): return - if hpat.config._has_pyarrow and fdef == ('read_parquet', 'hpat.io.parquet_pio'): + if sdc.config._has_pyarrow and fdef == ('read_parquet', 'sdc.io.parquet_pio'): return - if hpat.config._has_pyarrow and fdef == ('read_parquet_str', 'hpat.io.parquet_pio'): + if sdc.config._has_pyarrow and fdef == ('read_parquet_str', 'sdc.io.parquet_pio'): # string read creates array in output if lhs not in array_dists: array_dists[lhs] = Distribution.OneD return # TODO: fix "numba.extending" in function def - if hpat.config._has_xenon and fdef == ('read_xenon_col', 'numba.extending'): + if sdc.config._has_xenon and fdef == ('read_xenon_col', 'numba.extending'): array_dists[args[4].name] = Distribution.REP return - if hpat.config._has_xenon and fdef == ('read_xenon_str', 'numba.extending'): + if sdc.config._has_xenon and fdef == ('read_xenon_str', 'numba.extending'): array_dists[args[4].name] = Distribution.REP # string read creates array in output if lhs not in array_dists: @@ -501,20 +501,20 @@ def _analyze_call(self, lhs, rhs, func_var, args, array_dists): return if func_name == 'train' and isinstance(func_mod, ir.Var): - if self.typemap[func_mod.name] == hpat.ml.svc.svc_type: + if self.typemap[func_mod.name] == sdc.ml.svc.svc_type: self._meet_array_dists( args[0].name, args[1].name, array_dists, Distribution.Thread) return - if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type: + if self.typemap[func_mod.name] == sdc.ml.naive_bayes.mnb_type: self._meet_array_dists(args[0].name, args[1].name, array_dists) return if func_name == 'predict' and isinstance(func_mod, ir.Var): - if self.typemap[func_mod.name] == hpat.ml.svc.svc_type: + if self.typemap[func_mod.name] == sdc.ml.svc.svc_type: self._meet_array_dists( lhs, args[0].name, array_dists, Distribution.Thread) return - if self.typemap[func_mod.name] == hpat.ml.naive_bayes.mnb_type: + if self.typemap[func_mod.name] == sdc.ml.naive_bayes.mnb_type: self._meet_array_dists(lhs, args[0].name, array_dists) return @@ -639,7 +639,7 @@ def _analyze_call_df(self, lhs, arr, func_name, args, array_dists): def _analyze_call_hpat_dist(self, lhs, func_name, args, array_dists): """analyze distributions of hpat distributed functions - (hpat.distributed_api.func_name) + (sdc.distributed_api.func_name) """ if func_name == 'local_len': return @@ -674,7 +674,7 @@ def _analyze_call_hpat_dist(self, lhs, func_name, args, array_dists): return # set REP if not found - self._analyze_call_set_REP(lhs, args, array_dists, 'hpat.distributed_api.' + func_name) + self._analyze_call_set_REP(lhs, args, array_dists, 'sdc.distributed_api.' + func_name) def _analyze_call_np_concatenate(self, lhs, args, array_dists): assert len(args) == 1 @@ -951,8 +951,8 @@ def _gen_rebalances(self, rebalance_arrs, blocks): nodes = [inst] def f(in_arr): # pragma: no cover - out_a = hpat.distributed_api.rebalance_array(in_arr) - f_block = compile_to_numba_ir(f, {'hpat': hpat}, self.typingctx, + out_a = sdc.distributed_api.rebalance_array(in_arr) + f_block = compile_to_numba_ir(f, {'sdc': sdc}, self.typingctx, (self.typemap[tmp_arr.name],), self.typemap, self.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [tmp_arr]) @@ -1077,11 +1077,11 @@ def _get_array_accesses(blocks, func_ir, typemap, accesses=None): if isinstance(rhs, ir.Expr) and rhs.op == 'call': fdef = guard(find_callname, func_ir, rhs, typemap) if fdef is not None: - if fdef == ('get_split_view_index', 'hpat.hiframes.split_impl'): + if fdef == ('get_split_view_index', 'sdc.hiframes.split_impl'): accesses.add((rhs.args[0].name, rhs.args[1].name)) - if fdef == ('setitem_str_arr_ptr', 'hpat.str_arr_ext'): + if fdef == ('setitem_str_arr_ptr', 'sdc.str_arr_ext'): accesses.add((rhs.args[0].name, rhs.args[1].name)) - if fdef == ('str_arr_item_to_numeric', 'hpat.str_arr_ext'): + if fdef == ('str_arr_item_to_numeric', 'sdc.str_arr_ext'): accesses.add((rhs.args[0].name, rhs.args[1].name)) accesses.add((rhs.args[2].name, rhs.args[3].name)) for T, f in array_accesses_extensions.items(): diff --git a/hpat/distributed_api.py b/sdc/distributed_api.py similarity index 93% rename from hpat/distributed_api.py rename to sdc/distributed_api.py index 7b26a82e5..deb51328d 100644 --- a/hpat/distributed_api.py +++ b/sdc/distributed_api.py @@ -42,14 +42,14 @@ from numba.typing import signature from numba.typing.templates import infer_global, AbstractTemplate, infer -import hpat -from hpat import config -from hpat.str_arr_ext import (string_array_type, num_total_chars, StringArray, +import sdc +from sdc import config +from sdc.str_arr_ext import (string_array_type, num_total_chars, StringArray, pre_alloc_string_array, get_offset_ptr, get_data_ptr, convert_len_arr_to_offset) -from hpat.utils import (debug_prints, empty_like_type, _numba_to_c_type_map, unliteral_all) +from sdc.utils import (debug_prints, empty_like_type, _numba_to_c_type_map, unliteral_all) -if hpat.config.config_transport_mpi: +if sdc.config.config_transport_mpi: from . import transport_mpi as transport else: from . import transport_seq as transport @@ -86,8 +86,8 @@ def get_type_enum(arr): @overload(get_type_enum) def get_type_enum_overload(arr): dtype = arr.dtype - if isinstance(dtype, hpat.hiframes.pd_categorical_ext.PDCategoricalDtype): - dtype = hpat.hiframes.pd_categorical_ext.get_categories_int_type(dtype) + if isinstance(dtype, sdc.hiframes.pd_categorical_ext.PDCategoricalDtype): + dtype = sdc.hiframes.pd_categorical_ext.get_categories_int_type(dtype) typ_val = _numba_to_c_type_map[dtype] return lambda arr: np.int32(typ_val) @@ -145,8 +145,8 @@ def gather_scalar_overload(val): func_text = ( "def gather_scalar_impl(val):\n" - " n_pes = hpat.distributed_api.get_size()\n" - " rank = hpat.distributed_api.get_rank()\n" + " n_pes = sdc.distributed_api.get_size()\n" + " rank = sdc.distributed_api.get_rank()\n" " send = np.full(1, val, np.{})\n" " res_size = n_pes if rank == {} else 0\n" " res = np.empty(res_size, np.{})\n" @@ -154,7 +154,7 @@ def gather_scalar_overload(val): " return res\n").format(val, MPI_ROOT, val, typ_val) loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'c_gather_scalar': c_gather_scalar}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'c_gather_scalar': c_gather_scalar}, loc_vars) gather_impl = loc_vars['gather_scalar_impl'] return gather_impl @@ -184,7 +184,7 @@ def gatherv_overload(data): typ_val = _numba_to_c_type_map[data.dtype] def gatherv_impl(data): - rank = hpat.distributed_api.get_rank() + rank = sdc.distributed_api.get_rank() n_loc = len(data) recv_counts = gather_scalar(np.int32(n_loc)) n_total = recv_counts.sum() @@ -192,7 +192,7 @@ def gatherv_impl(data): # displacements displs = np.empty(1, np.int32) if rank == MPI_ROOT: - displs = hpat.hiframes.join.calc_disp(recv_counts) + displs = sdc.hiframes.join.calc_disp(recv_counts) c_gatherv( data.ctypes, np.int32(n_loc), @@ -209,7 +209,7 @@ def gatherv_impl(data): char_typ_enum = np.int32(_numba_to_c_type_map[types.uint8]) def gatherv_str_arr_impl(data): - rank = hpat.distributed_api.get_rank() + rank = sdc.distributed_api.get_rank() n_loc = len(data) n_all_chars = num_total_chars(data) @@ -233,8 +233,8 @@ def gatherv_str_arr_impl(data): if rank == MPI_ROOT: all_data = pre_alloc_string_array(n_total, n_total_char) - displs = hpat.hiframes.join.calc_disp(recv_counts) - displs_char = hpat.hiframes.join.calc_disp(recv_counts_char) + displs = sdc.hiframes.join.calc_disp(recv_counts) + displs_char = sdc.hiframes.join.calc_disp(recv_counts_char) offset_ptr = get_offset_ptr(all_data) data_ptr = get_data_ptr(all_data) @@ -281,7 +281,7 @@ def bcast_impl(data): char_typ_enum = np.int32(_numba_to_c_type_map[types.uint8]) def bcast_str_impl(data): - rank = hpat.distributed_api.get_rank() + rank = sdc.distributed_api.get_rank() n_loc = len(data) n_all_chars = num_total_chars(data) assert n_loc < INT_MAX @@ -328,7 +328,7 @@ def bcast_scalar_overload(val): " return send[0]\n").format(val, typ_val) loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'c_bcast': c_bcast}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'c_bcast': c_bcast}, loc_vars) bcast_scalar_impl = loc_vars['bcast_scalar_impl'] return bcast_scalar_impl @@ -343,7 +343,7 @@ def prealloc_str_for_bcast(arr): def prealloc_str_for_bcast_overload(arr): if arr == string_array_type: def prealloc_impl(arr): - rank = hpat.distributed_api.get_rank() + rank = sdc.distributed_api.get_rank() n_loc = bcast_scalar(len(arr)) n_all_char = bcast_scalar(np.int64(num_total_chars(arr))) if rank != MPI_ROOT: @@ -380,7 +380,7 @@ def const_slice_getitem_overload(arr, slice_index, start, count): reduce_op = Reduce_Type.Sum.value def getitem_str_impl(arr, slice_index, start, count): - rank = hpat.distributed_api.get_rank() + rank = sdc.distributed_api.get_rank() k = slice_index.stop # get total characters for allocation @@ -395,8 +395,8 @@ def getitem_str_impl(arr, slice_index, start, count): # get the total number of chars in our array, then gather all arrays into one # and compute total number of chars in all arrays n_chars = num_total_chars(my_arr) - my_arr = hpat.distributed_api.gatherv(my_arr) - n_chars = hpat.distributed_api.dist_reduce(n_chars, np.int32(reduce_op)) + my_arr = sdc.distributed_api.gatherv(my_arr) + n_chars = sdc.distributed_api.dist_reduce(n_chars, np.int32(reduce_op)) if rank != 0: out_arr = pre_alloc_string_array(k, n_chars) @@ -404,13 +404,13 @@ def getitem_str_impl(arr, slice_index, start, count): out_arr = my_arr # actual communication - hpat.distributed_api.bcast(out_arr) + sdc.distributed_api.bcast(out_arr) return out_arr return getitem_str_impl def getitem_impl(arr, slice_index, start, count): - rank = hpat.distributed_api.get_rank() + rank = sdc.distributed_api.get_rank() k = slice_index.stop out_arr = np.empty(k, arr.dtype) @@ -422,13 +422,13 @@ def getitem_impl(arr, slice_index, start, count): my_arr = arr[:0] # gather all subsets from all processors - my_arr = hpat.distributed_api.gatherv(my_arr) + my_arr = sdc.distributed_api.gatherv(my_arr) if rank == 0: out_arr = my_arr # actual communication - hpat.distributed_api.bcast(out_arr) + sdc.distributed_api.bcast(out_arr) return out_arr return getitem_impl diff --git a/hpat/distributed_lower.py b/sdc/distributed_lower.py similarity index 97% rename from hpat/distributed_lower.py rename to sdc/distributed_lower.py index 19c37c422..0d38e47da 100644 --- a/hpat/distributed_lower.py +++ b/sdc/distributed_lower.py @@ -42,13 +42,13 @@ from numba.typing.templates import infer_global, AbstractTemplate from numba.typing.builtins import IndexValueType -import hpat -from hpat import distributed_api -from hpat.utils import _numba_to_c_type_map -from hpat.distributed_api import mpi_req_numba_type, ReqArrayType, req_array_type +import sdc +from sdc import distributed_api +from sdc.utils import _numba_to_c_type_map +from sdc.distributed_api import mpi_req_numba_type, ReqArrayType, req_array_type from . import hdist -if hpat.config.config_transport_mpi: +if sdc.config.config_transport_mpi: from . import transport_mpi as transport else: from . import transport_seq as transport @@ -346,8 +346,8 @@ def lower_dist_rebalance_array_parallel(context, builder, sig, args): alloc_text = "np.empty(({}), in_arr.dtype)".format(shape_tup) func_text = """def f(in_arr, count): - n_pes = hpat.distributed_api.get_size() - my_rank = hpat.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() + my_rank = sdc.distributed_api.get_rank() out_arr = {} # copy old data old_len = len(in_arr) @@ -358,10 +358,10 @@ def lower_dist_rebalance_array_parallel(context, builder, sig, args): # get diff data for all procs my_diff = old_len - count all_diffs = np.empty(n_pes, np.int64) - hpat.distributed_api.allgather(all_diffs, my_diff) + sdc.distributed_api.allgather(all_diffs, my_diff) # alloc comm requests comm_req_ind = 0 - comm_reqs = hpat.distributed_api.comm_req_alloc(n_pes) + comm_reqs = sdc.distributed_api.comm_req_alloc(n_pes) req_ind = 0 # for each potential receiver for i in range(n_pes): @@ -375,14 +375,14 @@ def lower_dist_rebalance_array_parallel(context, builder, sig, args): # if I'm receiver if my_rank == i: buff = out_arr[out_ind:(out_ind+send_size)] - comm_reqs[comm_req_ind] = hpat.distributed_api.irecv( + comm_reqs[comm_req_ind] = sdc.distributed_api.irecv( buff, np.int32(buff.size), np.int32(j), np.int32(9)) comm_req_ind += 1 out_ind += send_size # if I'm sender if my_rank == j: buff = np.ascontiguousarray(in_arr[out_ind:(out_ind+send_size)]) - comm_reqs[comm_req_ind] = hpat.distributed_api.isend( + comm_reqs[comm_req_ind] = sdc.distributed_api.isend( buff, np.int32(buff.size), np.int32(i), np.int32(9)) comm_req_ind += 1 out_ind += send_size @@ -391,13 +391,13 @@ def lower_dist_rebalance_array_parallel(context, builder, sig, args): all_diffs[j] -= send_size # if receiver is done, stop sender search if all_diffs[i] == 0: break - hpat.distributed_api.waitall(np.int32(comm_req_ind), comm_reqs) - hpat.distributed_api.comm_req_dealloc(comm_reqs) + sdc.distributed_api.waitall(np.int32(comm_req_ind), comm_reqs) + sdc.distributed_api.comm_req_dealloc(comm_reqs) return out_arr """.format(alloc_text) loc = {} - exec(func_text, {'hpat': hpat, 'np': np}, loc) + exec(func_text, {'sdc': sdc, 'np': np}, loc) rebalance_impl = loc['f'] res = context.compile_internal(builder, rebalance_impl, sig, args) diff --git a/hpat/hiframes/__init__.py b/sdc/hiframes/__init__.py similarity index 100% rename from hpat/hiframes/__init__.py rename to sdc/hiframes/__init__.py diff --git a/hpat/hiframes/aggregate.py b/sdc/hiframes/aggregate.py similarity index 96% rename from hpat/hiframes/aggregate.py rename to sdc/hiframes/aggregate.py index 5ea91289e..8b583e0c4 100644 --- a/hpat/hiframes/aggregate.py +++ b/sdc/hiframes/aggregate.py @@ -46,20 +46,20 @@ from numba.typing import signature from numba.typing.templates import infer_global, AbstractTemplate from numba.extending import overload, lower_builtin -import hpat -from hpat.utils import (is_call_assign, is_var_assign, is_assign, debug_prints, +import sdc +from sdc.utils import (is_call_assign, is_var_assign, is_assign, debug_prints, alloc_arr_tup, empty_like_type) -from hpat import distributed, distributed_analysis -from hpat.distributed_analysis import Distribution -from hpat.utils import _numba_to_c_type_map, unliteral_all -from hpat.str_ext import string_type -from hpat.set_ext import num_total_chars_set_string, build_set -from hpat.str_arr_ext import (string_array_type, pre_alloc_string_array, +from sdc import distributed, distributed_analysis +from sdc.distributed_analysis import Distribution +from sdc.utils import _numba_to_c_type_map, unliteral_all +from sdc.str_ext import string_type +from sdc.set_ext import num_total_chars_set_string, build_set +from sdc.str_arr_ext import (string_array_type, pre_alloc_string_array, get_offset_ptr, get_data_ptr) -from hpat.hiframes.join import write_send_buff -from hpat.timsort import getitem_arr_tup -from hpat.shuffle_utils import (getitem_arr_tup_single, val_to_tup, alltoallv, +from sdc.hiframes.join import write_send_buff +from sdc.timsort import getitem_arr_tup +from sdc.shuffle_utils import (getitem_arr_tup_single, val_to_tup, alltoallv, alltoallv_tup, finalize_shuffle_meta, update_shuffle_meta, alloc_pre_shuffle_metadata, _get_keys_tup, _get_data_tup) @@ -73,7 +73,7 @@ def get_agg_func(func_ir, func_name, rhs): - from hpat.hiframes.series_kernels import series_replace_funcs + from sdc.hiframes.series_kernels import series_replace_funcs if func_name == 'var': return _column_var_impl_linear if func_name == 'std': @@ -123,7 +123,7 @@ def _var_combine(ssqdm_a, mean_a, nobs_a, ssqdm_b, mean_b, nobs_b): # pragma: n M2 = ssqdm_a + ssqdm_b + delta * delta * nobs_a * nobs_b / nobs return M2, mean_x, nobs -# XXX: njit doesn't work when hpat.jit() is used for agg_func in hiframes +# XXX: njit doesn't work when sdc.jit() is used for agg_func in hiframes # @numba.njit @@ -150,8 +150,8 @@ def _column_var_impl_linear(A): # pragma: no cover ssqdm_x = 0.0 N = len(A) for i in numba.parfor.internal_prange(N): - hpat.hiframes.aggregate.__special_combine( - ssqdm_x, mean_x, nobs, hpat.hiframes.aggregate._var_combine) + sdc.hiframes.aggregate.__special_combine( + ssqdm_x, mean_x, nobs, sdc.hiframes.aggregate._var_combine) val = A[i] if not np.isnan(val): nobs += 1 @@ -161,7 +161,7 @@ def _column_var_impl_linear(A): # pragma: no cover # ssqdm_x += ((nobs - 1) * delta ** 2) / nobs delta2 = val - mean_x ssqdm_x += delta * delta2 - return hpat.hiframes.rolling.calc_var(2, nobs, mean_x, ssqdm_x) + return sdc.hiframes.rolling.calc_var(2, nobs, mean_x, ssqdm_x) # TODO: avoid code duplication @@ -172,8 +172,8 @@ def _column_std_impl_linear(A): # pragma: no cover ssqdm_x = 0.0 N = len(A) for i in numba.parfor.internal_prange(N): - hpat.hiframes.aggregate.__special_combine( - ssqdm_x, mean_x, nobs, hpat.hiframes.aggregate._var_combine) + sdc.hiframes.aggregate.__special_combine( + ssqdm_x, mean_x, nobs, sdc.hiframes.aggregate._var_combine) val = A[i] if not np.isnan(val): nobs += 1 @@ -183,7 +183,7 @@ def _column_std_impl_linear(A): # pragma: no cover # ssqdm_x += ((nobs - 1) * delta ** 2) / nobs delta2 = val - mean_x ssqdm_x += delta * delta2 - v = hpat.hiframes.rolling.calc_var(2, nobs, mean_x, ssqdm_x) + v = sdc.hiframes.rolling.calc_var(2, nobs, mean_x, ssqdm_x) return v**0.5 @@ -287,7 +287,7 @@ def aggregate_usedefs(aggregate_node, use_set=None, def_set=None): def remove_dead_aggregate(aggregate_node, lives, arg_aliases, alias_map, func_ir, typemap): # - if not hpat.hiframes.api.enable_hiframes_remove_dead: + if not sdc.hiframes.api.enable_hiframes_remove_dead: return aggregate_node dead_cols = [] @@ -606,7 +606,7 @@ def agg_distributed_run(agg_node, array_dists, typemap, calltypes, typingctx, ta agg_node.df_in_vars.keys(), agg_node.df_out_vars.keys(), parallel) f_block = compile_to_numba_ir(top_level_func, - {'hpat': hpat, 'np': np, + {'sdc': sdc, 'np': np, 'agg_seq_iter': agg_seq_iter, 'parallel_agg': parallel_agg, '__update_redvars': agg_func_struct.update_all_func, @@ -654,7 +654,7 @@ def agg_distributed_run(agg_node, array_dists, typemap, calltypes, typingctx, ta def parallel_agg(key_arrs, data_redvar_dummy, out_dummy_tup, data_in, init_vals, __update_redvars, __combine_redvars, __eval_res, return_key, pivot_arr): # pragma: no cover # alloc shuffle meta - n_pes = hpat.distributed_api.get_size() + n_pes = sdc.distributed_api.get_size() pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, data_redvar_dummy, n_pes, False) # calc send/recv counts @@ -693,8 +693,8 @@ def agg_parallel_local_iter(key_arrs, data_in, shuffle_meta, data_redvar_dummy, # _init_val_1 = np.int64(0) # redvar_1_arr = np.full(n_uniq_keys, _init_val_1, np.int64) # out_key = np.empty(n_uniq_keys, np.float64) - n_pes = hpat.distributed_api.get_size() - # hpat.dict_ext.init_dict_float64_int64() + n_pes = sdc.distributed_api.get_size() + # sdc.dict_ext.init_dict_float64_int64() # key_write_map = get_key_dict(key_arrs[0]) key_write_map, byte_v = get_key_dict(key_arrs) @@ -714,7 +714,7 @@ def agg_parallel_local_iter(key_arrs, data_in, shuffle_meta, data_redvar_dummy, w_ind = key_write_map[k] __update_redvars(redvar_arrs, data_in, w_ind, i, pivot_arr) #redvar_arrs[0][w_ind], redvar_arrs[1][w_ind] = __update_redvars(redvar_arrs[0][w_ind], redvar_arrs[1][w_ind], data_in[0][i]) - hpat.dict_ext.byte_vec_free(byte_v) + sdc.dict_ext.byte_vec_free(byte_v) return @@ -748,7 +748,7 @@ def agg_parallel_combine_iter(key_arrs, reduce_recvs, out_dummy_tup, init_vals, for j in range(n_uniq_keys): __eval_res(local_redvars, out_arrs, j) - hpat.dict_ext.byte_vec_free(byte_v) + sdc.dict_ext.byte_vec_free(byte_v) return out_arrs @@ -781,7 +781,7 @@ def agg_seq_iter(key_arrs, redvar_dummy_tup, out_dummy_tup, data_in, init_vals, for j in range(n_uniq_keys): __eval_res(local_redvars, out_arrs, j) - hpat.dict_ext.byte_vec_free(byte_v) + sdc.dict_ext.byte_vec_free(byte_v) return out_arrs @@ -823,18 +823,18 @@ def get_key_dict_overload(arr): n_bytes += context.get_abi_sizeof(context.get_data_type(t.dtype)) def _impl(arr): - b_v = hpat.dict_ext.byte_vec_init(n_bytes, 0) - b_dict = hpat.dict_ext.dict_byte_vec_int64_init() + b_v = sdc.dict_ext.byte_vec_init(n_bytes, 0) + b_dict = sdc.dict_ext.dict_byte_vec_int64_init() return b_dict, b_v return _impl # regular scalar keys dtype = arr.types[0].dtype func_text = "def k_dict_impl(arr):\n" - func_text += " b_v = hpat.dict_ext.byte_vec_init(1, 0)\n" - func_text += " return hpat.dict_ext.dict_{}_int64_init(), b_v\n".format(dtype) + func_text += " b_v = sdc.dict_ext.byte_vec_init(1, 0)\n" + func_text += " return sdc.dict_ext.dict_{}_int64_init(), b_v\n".format(dtype) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) k_dict_impl = loc_vars['k_dict_impl'] return k_dict_impl @@ -852,12 +852,12 @@ def _getitem_keys_overload(arrs, ind, b_v): for i, t in enumerate(arrs.types): n_bytes = context.get_abi_sizeof(context.get_data_type(t.dtype)) func_text += " arr_ptr = arrs[{}].ctypes.data + ind * {}\n".format(i, n_bytes) - func_text += " hpat.dict_ext.byte_vec_set(b_v, {}, arr_ptr, {})\n".format(offset, n_bytes) + func_text += " sdc.dict_ext.byte_vec_set(b_v, {}, arr_ptr, {})\n".format(offset, n_bytes) offset += n_bytes func_text += " return b_v\n" loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) getitem_impl = loc_vars['getitem_impl'] return getitem_impl @@ -897,7 +897,7 @@ def get_key_set(arr): # pragma: no cover def get_key_set_overload(arr): if arr == string_array_type or (isinstance(arr, types.BaseTuple) and len(arr.types) == 1 and arr.types[0] == string_array_type): - return lambda arr: hpat.set_ext.init_set_string() + return lambda arr: sdc.set_ext.init_set_string() if isinstance(arr, types.BaseTuple): def get_set_tup(arr): @@ -1080,7 +1080,7 @@ def compile_to_optimized_ir(func, arg_typs, typingctx): # XXX are outside function's globals needed? code = func.code if hasattr(func, 'code') else func.__code__ - state.func_ir = get_ir_of_code({'numba': numba, 'np': np, 'hpat': hpat}, code) + state.func_ir = get_ir_of_code({'numba': numba, 'np': np, 'sdc': sdc}, code) state.typingctx = typingctx state.args = arg_typs @@ -1097,7 +1097,7 @@ def compile_to_optimized_ir(func, arg_typs, typingctx): assert state.func_ir.arg_count == 1, "agg function should have one input" input_name = state.func_ir.arg_names[0] - df_pass = hpat.hiframes.hiframes_untyped.HiFramesPass() + df_pass = sdc.hiframes.hiframes_untyped.HiFramesPass() df_pass.run_pass(state) remove_dead(state.func_ir.blocks, state.func_ir.arg_names, state.func_ir) state.typemap, return_type, state.calltypes = typed_passes.type_inference_stage( @@ -1110,7 +1110,7 @@ def compile_to_optimized_ir(func, arg_typs, typingctx): preparfor_pass = numba.parfor.PreParforPass(state.func_ir, state.typemap, state.calltypes, state.typingctx, options) preparfor_pass.run() state.func_ir._definitions = build_definitions(state.func_ir.blocks) - df_t_pass = hpat.hiframes.hiframes_typed.HiFramesTypedPass() + df_t_pass = sdc.hiframes.hiframes_typed.HiFramesTypedPass() df_t_pass.run_pass(state) numba.rewrites.rewrite_registry.apply('after-inference', state) parfor_pass = numba.parfor.ParforPass(state.func_ir, state.typemap, @@ -1505,7 +1505,7 @@ def gen_eval_func(f_ir, eval_nodes, reduce_vars, var_types, pm, typingctx, targe agg_eval = loc_vars['agg_eval'] arg_typs = tuple(var_types) - f_ir = compile_to_numba_ir(agg_eval, {'numba': numba, 'hpat': hpat, 'np': np, '_zero': zero}, # TODO: add outside globals + f_ir = compile_to_numba_ir(agg_eval, {'numba': numba, 'sdc': sdc, 'np': np, '_zero': zero}, # TODO: add outside globals typingctx, arg_typs, pm.typemap, pm.calltypes) @@ -1546,7 +1546,7 @@ def gen_combine_func(f_ir, parfor, redvars, var_to_redvar, var_types, arr_var, bl = parfor.loop_body[label] for stmt in bl.body: if is_call_assign(stmt) and (guard(find_callname, f_ir, stmt.value) - == ('__special_combine', 'hpat.hiframes.aggregate')): + == ('__special_combine', 'sdc.hiframes.aggregate')): args = stmt.value.args l_argnames = [] r_argnames = [] @@ -1562,8 +1562,8 @@ def gen_combine_func(f_ir, parfor, redvars, var_to_redvar, var_types, arr_var, sp_func = guard(find_callname, f_ir, dummy_call) # XXX: only var supported for now # TODO: support general functions - assert sp_func == ('_var_combine', 'hpat.hiframes.aggregate') - sp_func = hpat.hiframes.aggregate._var_combine + assert sp_func == ('_var_combine', 'sdc.hiframes.aggregate') + sp_func = sdc.hiframes.aggregate._var_combine special_combines[comb_name] = sp_func # reduction variables @@ -1591,7 +1591,7 @@ def gen_combine_func(f_ir, parfor, redvars, var_to_redvar, var_types, arr_var, # reduction variable types for new input and existing values arg_typs = tuple(2 * var_types) - glbs = {'numba': numba, 'hpat': hpat, 'np': np} + glbs = {'numba': numba, 'sdc': sdc, 'np': np} glbs.update(special_combines) f_ir = compile_to_numba_ir(agg_combine, glbs, # TODO: add outside globals typingctx, arg_typs, @@ -1650,11 +1650,11 @@ def gen_update_func(parfor, redvars, var_to_redvar, var_types, arr_var, continue if is_getitem(stmt) and stmt.value.value.name == arr_var.name: stmt.value = in_vars[0] - # XXX replace hpat.hiframes.api.isna(A, i) for now + # XXX replace sdc.hiframes.api.isna(A, i) for now # TODO: handle actual NA # for test_agg_seq_count_str test if (is_call_assign(stmt) and guard(find_callname, pm.func_ir, stmt.value) - == ('isna', 'hpat.hiframes.api') + == ('isna', 'sdc.hiframes.api') and stmt.value.args[0].name == arr_var.name): stmt.value = ir.Const(False, stmt.target.loc) # store reduction variables diff --git a/hpat/hiframes/api.py b/sdc/hiframes/api.py similarity index 94% rename from hpat/hiframes/api.py rename to sdc/hiframes/api.py index 8c4495d61..253c5fe74 100644 --- a/hpat/hiframes/api.py +++ b/sdc/hiframes/api.py @@ -52,33 +52,33 @@ RefType) from numba.targets.arrayobj import make_array, _getitem_array1d -import hpat -from hpat.utils import _numba_to_c_type_map, unliteral_all -from hpat.str_ext import string_type, list_string_array_type -from hpat.set_ext import build_set -from hpat.str_arr_ext import ( +import sdc +from sdc.utils import _numba_to_c_type_map, unliteral_all +from sdc.str_ext import string_type, list_string_array_type +from sdc.set_ext import build_set +from sdc.str_arr_ext import ( StringArrayType, string_array_type, is_str_arr_typ, num_total_chars, append_string_array_to) -from hpat.hiframes.pd_timestamp_ext import (pandas_timestamp_type, datetime_date_type, set_df_datetime_date_lower) -from hpat.hiframes.pd_series_ext import ( +from sdc.hiframes.pd_timestamp_ext import (pandas_timestamp_type, datetime_date_type, set_df_datetime_date_lower) +from sdc.hiframes.pd_series_ext import ( SeriesType, is_str_series_typ, if_arr_to_series_type, series_to_array_type, if_series_to_array_type, is_dt64_series_typ) -from hpat.hiframes.pd_index_ext import DatetimeIndexType, TimedeltaIndexType -from hpat.hiframes.sort import ( +from sdc.hiframes.pd_index_ext import DatetimeIndexType, TimedeltaIndexType +from sdc.hiframes.sort import ( alltoallv, alltoallv_tup, finalize_shuffle_meta, update_shuffle_meta, alloc_pre_shuffle_metadata) -from hpat.hiframes.join import write_send_buff -from hpat.hiframes.split_impl import string_array_split_view_type +from sdc.hiframes.join import write_send_buff +from sdc.hiframes.split_impl import string_array_split_view_type from numba.errors import TypingError # XXX: used in agg func output to avoid mutating filter, agg, join, etc. @@ -87,7 +87,7 @@ # quantile imports? -if hpat.config.config_transport_mpi: +if sdc.config.config_transport_mpi: from .. import transport_mpi as transport else: from .. import transport_seq as transport @@ -184,7 +184,7 @@ def concat(arr_list): @numba.njit def nth_element(arr, k, parallel=False): res = np.empty(1, arr.dtype) - type_enum = hpat.distributed_api.get_type_enum(arr) + type_enum = sdc.distributed_api.get_type_enum(arr) if parallel: nth_parallel(res.ctypes, arr.ctypes, len(arr), k, type_enum) else: @@ -192,7 +192,7 @@ def nth_element(arr, k, parallel=False): return res[0] -sum_op = hpat.distributed_api.Reduce_Type.Sum.value +sum_op = sdc.distributed_api.Reduce_Type.Sum.value @numba.njit @@ -200,12 +200,12 @@ def median(arr, parallel=False): # similar to numpy/lib/function_base.py:_median # TODO: check return types, e.g. float32 -> float32 - if not hpat.config.config_transport_mpi: + if not sdc.config.config_transport_mpi: parallel = False n = len(arr) if parallel: - n = hpat.distributed_api.dist_reduce(n, np.int32(sum_op)) + n = sdc.distributed_api.dist_reduce(n, np.int32(sum_op)) k = n // 2 # odd length case @@ -255,17 +255,17 @@ def string_concat_impl(in_arrs): for A in in_arrs: arr = dummy_unbox_series(A) num_strs += len(arr) - num_chars += hpat.str_arr_ext.num_total_chars(arr) - out_arr = hpat.str_arr_ext.pre_alloc_string_array(num_strs, num_chars) + num_chars += sdc.str_arr_ext.num_total_chars(arr) + out_arr = sdc.str_arr_ext.pre_alloc_string_array(num_strs, num_chars) # copy data to output curr_str_ind = 0 curr_chars_ind = 0 for A in in_arrs: arr = dummy_unbox_series(A) - hpat.str_arr_ext.set_string_array_range( + sdc.str_arr_ext.set_string_array_range( out_arr, arr, curr_str_ind, curr_chars_ind) curr_str_ind += len(arr) - curr_chars_ind += hpat.str_arr_ext.num_total_chars(arr) + curr_chars_ind += sdc.str_arr_ext.num_total_chars(arr) return out_arr return string_concat_impl @@ -322,12 +322,12 @@ def lower_nunique_parallel(context, builder, sig, args): def nunique_overload_parallel(arr_typ): - sum_op = hpat.distributed_api.Reduce_Type.Sum.value + sum_op = sdc.distributed_api.Reduce_Type.Sum.value def nunique_par(A): - uniq_A = hpat.hiframes.api.unique_parallel(A) + uniq_A = sdc.hiframes.api.unique_parallel(A) loc_nuniq = len(uniq_A) - return hpat.distributed_api.dist_reduce(loc_nuniq, np.int32(sum_op)) + return sdc.distributed_api.dist_reduce(loc_nuniq, np.int32(sum_op)) return nunique_par @@ -362,7 +362,7 @@ def lower_unique(context, builder, sig, args): def unique_overload(arr_typ): # TODO: extend to other types like datetime? def unique_seq(A): - return hpat.utils.to_array(build_set(A)) + return sdc.utils.to_array(build_set(A)) return unique_seq @@ -378,10 +378,10 @@ def lower_unique_parallel(context, builder, sig, args): def unique_overload_parallel(arr_typ): def unique_par(A): - uniq_A = hpat.utils.to_array(build_set(A)) + uniq_A = sdc.utils.to_array(build_set(A)) key_arrs = (uniq_A,) - n_pes = hpat.distributed_api.get_size() + n_pes = sdc.distributed_api.get_size() pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, (), n_pes, False) # calc send/recv counts @@ -403,7 +403,7 @@ def unique_par(A): # shuffle out_arr, = alltoallv_tup(key_arrs, shuffle_meta) - return hpat.utils.to_array(build_set(out_arr)) + return sdc.utils.to_array(build_set(out_arr)) return unique_par @@ -423,14 +423,14 @@ def unique_par(A): # TODO: refactor with join @numba.njit def set_recv_counts_chars(key_arr): - n_pes = hpat.distributed_api.get_size() + n_pes = sdc.distributed_api.get_size() send_counts = np.zeros(n_pes, np.int32) recv_counts = np.empty(n_pes, np.int32) for i in range(len(key_arr)): str = key_arr[i] node_id = hash(str) % n_pes send_counts[node_id] += len(str) - hpat.distributed_api.alltoall(send_counts, recv_counts, 1) + sdc.distributed_api.alltoall(send_counts, recv_counts, 1) return send_counts, recv_counts @@ -544,7 +544,7 @@ def isna(arr, i): @overload(isna) def isna_overload(arr, i): if arr == string_array_type: - return lambda arr, i: hpat.str_arr_ext.str_arr_is_na(arr, i) + return lambda arr, i: sdc.str_arr_ext.str_arr_is_na(arr, i) # TODO: support NaN in list(list(str)) if arr == list_string_array_type: return lambda arr, i: False @@ -683,16 +683,16 @@ def nlargest_parallel(A, k, is_largest, cmp_f): # parallel algorithm: assuming k << len(A), just call nlargest on chunks # of A, gather the result and return the largest k # TODO: support cases where k is not too small - my_rank = hpat.distributed_api.get_rank() + my_rank = sdc.distributed_api.get_rank() local_res = nlargest(A, k, is_largest, cmp_f) - all_largest = hpat.distributed_api.gatherv(local_res) + all_largest = sdc.distributed_api.gatherv(local_res) # TODO: handle len(res) < k case if my_rank == MPI_ROOT: res = nlargest(all_largest, k, is_largest, cmp_f) else: res = np.empty(k, A.dtype) - hpat.distributed_api.bcast(res) + sdc.distributed_api.bcast(res) return res @@ -902,7 +902,7 @@ def set_df_col(df, cname, arr): @infer_global(set_df_col) class SetDfColInfer(AbstractTemplate): def generic(self, args, kws): - from hpat.hiframes.pd_dataframe_ext import DataFrameType + from sdc.hiframes.pd_dataframe_ext import DataFrameType assert not kws assert len(args) == 3 assert isinstance(args[1], types.Literal) @@ -1069,18 +1069,18 @@ def alias_ext_init_series(lhs_name, args, alias_map, arg_aliases): if hasattr(numba.ir_utils, 'alias_func_extensions'): - numba.ir_utils.alias_func_extensions[('init_series', 'hpat.hiframes.api')] = alias_ext_init_series - numba.ir_utils.alias_func_extensions[('get_series_data', 'hpat.hiframes.api')] = alias_ext_dummy_func - numba.ir_utils.alias_func_extensions[('get_series_index', 'hpat.hiframes.api')] = alias_ext_dummy_func - numba.ir_utils.alias_func_extensions[('init_datetime_index', 'hpat.hiframes.api')] = alias_ext_dummy_func - numba.ir_utils.alias_func_extensions[('get_index_data', 'hpat.hiframes.api')] = alias_ext_dummy_func - numba.ir_utils.alias_func_extensions[('dummy_unbox_series', 'hpat.hiframes.api')] = alias_ext_dummy_func + numba.ir_utils.alias_func_extensions[('init_series', 'sdc.hiframes.api')] = alias_ext_init_series + numba.ir_utils.alias_func_extensions[('get_series_data', 'sdc.hiframes.api')] = alias_ext_dummy_func + numba.ir_utils.alias_func_extensions[('get_series_index', 'sdc.hiframes.api')] = alias_ext_dummy_func + numba.ir_utils.alias_func_extensions[('init_datetime_index', 'sdc.hiframes.api')] = alias_ext_dummy_func + numba.ir_utils.alias_func_extensions[('get_index_data', 'sdc.hiframes.api')] = alias_ext_dummy_func + numba.ir_utils.alias_func_extensions[('dummy_unbox_series', 'sdc.hiframes.api')] = alias_ext_dummy_func numba.ir_utils.alias_func_extensions[('get_dataframe_data', - 'hpat.hiframes.pd_dataframe_ext')] = alias_ext_dummy_func + 'sdc.hiframes.pd_dataframe_ext')] = alias_ext_dummy_func # TODO: init_dataframe - numba.ir_utils.alias_func_extensions[('to_arr_from_series', 'hpat.hiframes.api')] = alias_ext_dummy_func - numba.ir_utils.alias_func_extensions[('ts_series_to_arr_typ', 'hpat.hiframes.api')] = alias_ext_dummy_func - numba.ir_utils.alias_func_extensions[('to_date_series_type', 'hpat.hiframes.api')] = alias_ext_dummy_func + numba.ir_utils.alias_func_extensions[('to_arr_from_series', 'sdc.hiframes.api')] = alias_ext_dummy_func + numba.ir_utils.alias_func_extensions[('ts_series_to_arr_typ', 'sdc.hiframes.api')] = alias_ext_dummy_func + numba.ir_utils.alias_func_extensions[('to_date_series_type', 'sdc.hiframes.api')] = alias_ext_dummy_func @numba.njit @@ -1188,7 +1188,7 @@ def generic(self, args, kws): if isinstance(column, types.List) and column.dtype == string_type: ret_typ = string_array_type if isinstance(column, DatetimeIndexType): - ret_typ = hpat.hiframes.pd_index_ext._dt_index_data_typ + ret_typ = sdc.hiframes.pd_index_ext._dt_index_data_typ if isinstance(column, SeriesType): ret_typ = column.data # TODO: add other types @@ -1216,14 +1216,14 @@ def fix_df_array_list_impl(column): # pragma: no cover # convert list of strings to string array if isinstance(column, types.List) and column.dtype == string_type: def fix_df_array_str_impl(column): # pragma: no cover - return hpat.str_arr_ext.StringArray(column) + return sdc.str_arr_ext.StringArray(column) return fix_df_array_str_impl if isinstance(column, DatetimeIndexType): - return lambda column: hpat.hiframes.api.get_index_data(column) + return lambda column: sdc.hiframes.api.get_index_data(column) if isinstance(column, SeriesType): - return lambda column: hpat.hiframes.api.get_series_data(column) + return lambda column: sdc.hiframes.api.get_series_data(column) # column is array if not list assert isinstance(column, (types.Array, StringArrayType, SeriesType)) @@ -1516,7 +1516,7 @@ def join_dummy(left_df, right_df, left_on, right_on, how): @infer_global(join_dummy) class JoinTyper(AbstractTemplate): def generic(self, args, kws): - from hpat.hiframes.pd_dataframe_ext import DataFrameType + from sdc.hiframes.pd_dataframe_ext import DataFrameType assert not kws left_df, right_df, left_on, right_on, how = args @@ -1568,12 +1568,12 @@ def drop_inplace(df): @overload(drop_inplace) def drop_inplace_overload(df, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'): - from hpat.hiframes.pd_dataframe_ext import DataFrameType + from sdc.hiframes.pd_dataframe_ext import DataFrameType assert isinstance(df, DataFrameType) # TODO: support recovery when object is not df def _impl(df, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'): - new_df = hpat.hiframes.pd_dataframe_ext.drop_dummy( + new_df = sdc.hiframes.pd_dataframe_ext.drop_dummy( df, labels, axis, columns, inplace) return new_df, None @@ -1601,7 +1601,7 @@ def list_str_arr_getitem_array(arr, ind): # TODO: convert to parfor in typed pass def list_str_arr_getitem_impl(arr, ind): n = ind.sum() - out_arr = hpat.str_ext.alloc_list_list_str(n) + out_arr = sdc.str_ext.alloc_list_list_str(n) j = 0 for i in range(len(ind)): if ind[i]: @@ -1730,7 +1730,7 @@ def iternext_itertuples(context, builder, sig, args, result): if arr_typ == types.Array(types.NPDatetime('ns'), 1, 'C'): getitem_sig = signature(pandas_timestamp_type, arr_typ, types.intp) val = context.compile_internal( - builder, lambda a, i: hpat.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp( + builder, lambda a, i: sdc.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp( np.int64( a[i])), getitem_sig, [ arr_ptr, index]) diff --git a/hpat/hiframes/boxing.py b/sdc/hiframes/boxing.py similarity index 97% rename from hpat/hiframes/boxing.py rename to sdc/hiframes/boxing.py index 883a9c665..3a4701160 100644 --- a/hpat/hiframes/boxing.py +++ b/sdc/hiframes/boxing.py @@ -41,17 +41,17 @@ from numba.targets.boxing import _NumbaTypeHelper from numba.targets import listobj -import hpat -from hpat.hiframes.pd_dataframe_ext import DataFrameType -from hpat.hiframes.pd_timestamp_ext import (datetime_date_type, +import sdc +from sdc.hiframes.pd_dataframe_ext import DataFrameType +from sdc.hiframes.pd_timestamp_ext import (datetime_date_type, unbox_datetime_date_array, box_datetime_date_array) -from hpat.str_ext import string_type, list_string_array_type -from hpat.str_arr_ext import (string_array_type, unbox_str_series, box_str_arr) -from hpat.hiframes.pd_categorical_ext import (PDCategoricalDtype, +from sdc.str_ext import string_type, list_string_array_type +from sdc.str_arr_ext import (string_array_type, unbox_str_series, box_str_arr) +from sdc.hiframes.pd_categorical_ext import (PDCategoricalDtype, box_categorical_array, unbox_categorical_array) -from hpat.hiframes.pd_series_ext import (SeriesType, arr_to_series_type, +from sdc.hiframes.pd_series_ext import (SeriesType, arr_to_series_type, _get_series_array_type) -from hpat.hiframes.split_impl import (string_array_split_view_type, +from sdc.hiframes.split_impl import (string_array_split_view_type, box_str_arr_split_view) from .. import hstr_ext diff --git a/hpat/hiframes/dataframe_pass.py b/sdc/hiframes/dataframe_pass.py similarity index 90% rename from hpat/hiframes/dataframe_pass.py rename to sdc/hiframes/dataframe_pass.py index a6d96d43b..55c3d0bd2 100644 --- a/hpat/hiframes/dataframe_pass.py +++ b/sdc/hiframes/dataframe_pass.py @@ -45,23 +45,23 @@ from numba.extending import overload from numba.typing.templates import infer_global, AbstractTemplate, signature from numba.compiler_machinery import FunctionPass, register_pass -import hpat -from hpat import hiframes -from hpat.utils import (debug_prints, inline_new_blocks, ReplaceFunc, +import sdc +from sdc import hiframes +from sdc.utils import (debug_prints, inline_new_blocks, ReplaceFunc, is_whole_slice, is_array, is_assign, sanitize_varname, update_globals) -from hpat.str_ext import string_type -from hpat.str_arr_ext import (string_array_type, StringArrayType, +from sdc.str_ext import string_type +from sdc.str_arr_ext import (string_array_type, StringArrayType, is_str_arr_typ, pre_alloc_string_array) -from hpat.io.pio_api import h5dataset_type -from hpat.hiframes.rolling import get_rolling_setup_args -from hpat.hiframes.pd_dataframe_ext import (DataFrameType, DataFrameLocType, +from sdc.io.pio_api import h5dataset_type +from sdc.hiframes.rolling import get_rolling_setup_args +from sdc.hiframes.pd_dataframe_ext import (DataFrameType, DataFrameLocType, DataFrameILocType, DataFrameIatType) -from hpat.hiframes.pd_series_ext import SeriesType, is_series_type -import hpat.hiframes.pd_groupby_ext -from hpat.hiframes.pd_groupby_ext import DataFrameGroupByType -import hpat.hiframes.pd_rolling_ext -from hpat.hiframes.pd_rolling_ext import RollingType -from hpat.hiframes.aggregate import get_agg_func +from sdc.hiframes.pd_series_ext import SeriesType, is_series_type +import sdc.hiframes.pd_groupby_ext +from sdc.hiframes.pd_groupby_ext import DataFrameGroupByType +import sdc.hiframes.pd_rolling_ext +from sdc.hiframes.pd_rolling_ext import RollingType +from sdc.hiframes.aggregate import get_agg_func @register_pass(mutates_CFG=True, analysis_only=False) @@ -273,7 +273,7 @@ def _run_getitem(self, assign, rhs): arr = self._get_dataframe_data(df_var, index, nodes) # TODO: index return self._replace_func( - lambda A: hpat.hiframes.api.init_series(A), + lambda A: sdc.hiframes.api.init_series(A), [arr], pre_nodes=nodes) # df[['C1', 'C2']] @@ -335,11 +335,11 @@ def _run_getitem(self, assign, rhs): if guard(is_whole_slice, self.state.typemap, self.state.func_ir, col_filter_var): def func(A, ind, name): - return hpat.hiframes.api.init_series(A, None, name) + return sdc.hiframes.api.init_series(A, None, name) else: # TODO: test this case def func(A, ind, name): - return hpat.hiframes.api.init_series(A[ind], None, name) + return sdc.hiframes.api.init_series(A[ind], None, name) return self._replace_func(func, [in_arr, col_filter_var, name_var], pre_nodes=nodes) @@ -429,7 +429,7 @@ def _run_getattr(self, assign, rhs): self.state.typemap[name.name] = types.StringLiteral(col_name) nodes.append(ir.Assign(ir.Const(col_name, arr.loc), name, arr.loc)) return self._replace_func( - lambda arr, index, name: hpat.hiframes.api.init_series( + lambda arr, index, name: sdc.hiframes.api.init_series( arr, index, name), [arr, index, name], pre_nodes=nodes) # df.shape @@ -491,7 +491,7 @@ def _run_binop(self, assign, rhs): # self.state.typemap[out_data.name] = self.state.calltypes[rhs].return_type # nodes.append(ir.Assign(rhs, out_data, rhs.loc)) # return self._replace_func( - # lambda data: hpat.hiframes.api.init_series(data, None, None), + # lambda data: sdc.hiframes.api.init_series(data, None, None), # [out_data], # pre_nodes=nodes # ) @@ -510,7 +510,7 @@ def _run_unary(self, assign, rhs): # self.state.typemap[out_data.name] = self.state.calltypes[rhs].return_type # nodes.append(ir.Assign(rhs, out_data, rhs.loc)) # return self._replace_func( - # lambda data: hpat.hiframes.api.init_series(data), + # lambda data: sdc.hiframes.api.init_series(data), # [out_data], # pre_nodes=nodes # ) @@ -536,14 +536,14 @@ def _run_call(self, assign, lhs, rhs): if fdef == ('len', 'builtins') and self._is_df_var(rhs.args[0]): return self._run_call_len(lhs, rhs.args[0]) - if fdef == ('set_df_col', 'hpat.hiframes.api'): + if fdef == ('set_df_col', 'sdc.hiframes.api'): return self._run_call_set_df_column(assign, lhs, rhs) if fdef == ('merge', 'pandas'): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.merge_overload( + impl = sdc.hiframes.pd_dataframe_ext.merge_overload( *arg_typs, **kw_typs) return self._replace_func(impl, rhs.args, pysig=self.state.calltypes[rhs].pysig, kws=dict(rhs.kws)) @@ -552,12 +552,12 @@ def _run_call(self, assign, lhs, rhs): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.merge_asof_overload( + impl = sdc.hiframes.pd_dataframe_ext.merge_asof_overload( *arg_typs, **kw_typs) return self._replace_func(impl, rhs.args, pysig=self.state.calltypes[rhs].pysig, kws=dict(rhs.kws)) - if fdef == ('join_dummy', 'hpat.hiframes.api'): + if fdef == ('join_dummy', 'sdc.hiframes.api'): return self._run_call_join(assign, lhs, rhs) if (isinstance(func_mod, ir.Var) @@ -565,7 +565,7 @@ def _run_call(self, assign, lhs, rhs): return self._run_call_dataframe( assign, assign.target, rhs, func_mod, func_name) - if fdef == ('add_consts_to_type', 'hpat.hiframes.api'): + if fdef == ('add_consts_to_type', 'sdc.hiframes.api'): assign.value = rhs.args[0] return [assign] @@ -580,101 +580,101 @@ def _run_call(self, assign, lhs, rhs): return self._run_call_rolling( assign, assign.target, rhs, func_mod, func_name) - if fdef == ('pivot_table_dummy', 'hpat.hiframes.pd_groupby_ext'): + if fdef == ('pivot_table_dummy', 'sdc.hiframes.pd_groupby_ext'): return self._run_call_pivot_table(assign, lhs, rhs) if fdef == ('crosstab', 'pandas'): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.crosstab_overload( + impl = sdc.hiframes.pd_dataframe_ext.crosstab_overload( *arg_typs, **kw_typs) return self._replace_func(impl, rhs.args, pysig=self.state.calltypes[rhs].pysig, kws=dict(rhs.kws)) - if fdef == ('crosstab_dummy', 'hpat.hiframes.pd_groupby_ext'): + if fdef == ('crosstab_dummy', 'sdc.hiframes.pd_groupby_ext'): return self._run_call_crosstab(assign, lhs, rhs) if fdef == ('concat', 'pandas'): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.concat_overload( + impl = sdc.hiframes.pd_dataframe_ext.concat_overload( *arg_typs, **kw_typs) return self._replace_func(impl, rhs.args, pysig=self.state.calltypes[rhs].pysig, kws=dict(rhs.kws)) - if (fdef == ('concat_dummy', 'hpat.hiframes.pd_dataframe_ext') + if (fdef == ('concat_dummy', 'sdc.hiframes.pd_dataframe_ext') and isinstance(self.state.typemap[lhs.name], DataFrameType)): return self._run_call_concat(assign, lhs, rhs) - if fdef == ('sort_values_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('sort_values_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_df_sort_values(assign, lhs, rhs) - if fdef == ('itertuples_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('itertuples_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_df_itertuples(assign, lhs, rhs) - if fdef == ('head_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('head_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_df_head(assign, lhs, rhs) - if fdef == ('isna_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('isna_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_df_isna(assign, lhs, rhs) - if fdef == ('astype_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('astype_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_df_astype(assign, lhs, rhs) - if fdef == ('fillna_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('fillna_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_df_fillna(assign, lhs, rhs) - if fdef == ('dropna_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('dropna_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_df_dropna(assign, lhs, rhs) - if fdef == ('reset_index_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('reset_index_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_reset_index(assign, lhs, rhs) - if fdef == ('drop_inplace', 'hpat.hiframes.api'): + if fdef == ('drop_inplace', 'sdc.hiframes.api'): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.api.drop_inplace_overload( + impl = sdc.hiframes.api.drop_inplace_overload( *arg_typs, **kw_typs) return self._replace_func(impl, rhs.args, pysig=self.state.calltypes[rhs].pysig, kws=dict(rhs.kws)) - if fdef == ('drop_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('drop_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_drop(assign, lhs, rhs) - if fdef == ('isin_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('isin_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_isin(assign, lhs, rhs) - if fdef == ('pct_change_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('pct_change_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_pct_change(assign, lhs, rhs) - if fdef == ('mean_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('mean_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'mean') - if fdef == ('median_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('median_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'median') - if fdef == ('std_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('std_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'std') - if fdef == ('var_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('var_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'var') - if fdef == ('max_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('max_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'max') - if fdef == ('min_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('min_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'min') - if fdef == ('sum_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('sum_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'sum') - if fdef == ('prod_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('prod_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'prod') - if fdef == ('count_dummy', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('count_dummy', 'sdc.hiframes.pd_dataframe_ext'): return self._run_call_col_reduce(assign, lhs, rhs, 'count') return [assign] @@ -685,7 +685,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.merge_overload( + impl = sdc.hiframes.pd_dataframe_ext.merge_overload( *arg_typs, **kw_typs) return self._replace_func(impl, rhs.args, pysig=numba.utils.pysignature(pd.merge), @@ -696,7 +696,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.pivot_table_overload( + impl = sdc.hiframes.pd_dataframe_ext.pivot_table_overload( *arg_typs, **kw_typs) stub = (lambda df, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', @@ -710,7 +710,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_rolling_ext.df_rolling_overload( + impl = sdc.hiframes.pd_rolling_ext.df_rolling_overload( *arg_typs, **kw_typs) stub = (lambda df, window, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None: None) @@ -732,7 +732,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.sort_values_overload( + impl = sdc.hiframes.pd_dataframe_ext.sort_values_overload( *arg_typs, **kw_typs) stub = (lambda df, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last': None) @@ -745,7 +745,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.itertuples_overload( + impl = sdc.hiframes.pd_dataframe_ext.itertuples_overload( *arg_typs, **kw_typs) stub = (lambda df, index=True, name='Pandas': None) return self._replace_func(impl, rhs.args, @@ -757,7 +757,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.head_overload( + impl = sdc.hiframes.pd_dataframe_ext.head_overload( *arg_typs, **kw_typs) stub = (lambda df, n=5: None) return self._replace_func(impl, rhs.args, @@ -769,7 +769,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.isna_overload( + impl = sdc.hiframes.pd_dataframe_ext.isna_overload( *arg_typs, **kw_typs) stub = (lambda df: None) return self._replace_func(impl, rhs.args, @@ -781,7 +781,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.astype_overload( + impl = sdc.hiframes.pd_dataframe_ext.astype_overload( *arg_typs, **kw_typs) stub = (lambda df, dtype, copy=True, errors='raise': None) return self._replace_func(impl, rhs.args, @@ -793,7 +793,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.fillna_overload( + impl = sdc.hiframes.pd_dataframe_ext.fillna_overload( *arg_typs, **kw_typs) stub = (lambda df, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None: None) @@ -806,7 +806,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.dropna_overload( + impl = sdc.hiframes.pd_dataframe_ext.dropna_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=0, how='any', thresh=None, subset=None, inplace=False: None) @@ -819,7 +819,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.reset_index_overload( + impl = sdc.hiframes.pd_dataframe_ext.reset_index_overload( *arg_typs, **kw_typs) stub = (lambda df, level=None, drop=False, inplace=False, col_level=0, col_fill='': None) @@ -832,7 +832,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.drop_overload( + impl = sdc.hiframes.pd_dataframe_ext.drop_overload( *arg_typs, **kw_typs) stub = (lambda df, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise': None) @@ -845,7 +845,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.isin_overload( + impl = sdc.hiframes.pd_dataframe_ext.isin_overload( *arg_typs, **kw_typs) stub = (lambda df, values: None) return self._replace_func(impl, rhs.args, @@ -857,7 +857,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.append_overload( + impl = sdc.hiframes.pd_dataframe_ext.append_overload( *arg_typs, **kw_typs) stub = (lambda df, other, ignore_index=False, verify_integrity=False, sort=None: None) @@ -870,7 +870,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.pct_change_overload( + impl = sdc.hiframes.pd_dataframe_ext.pct_change_overload( *arg_typs, **kw_typs) stub = (lambda df, periods=1, fill_method='pad', limit=None, freq=None: None) @@ -883,7 +883,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.mean_overload( + impl = sdc.hiframes.pd_dataframe_ext.mean_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=None, skipna=None, level=None, numeric_only=None: None) @@ -896,7 +896,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.median_overload( + impl = sdc.hiframes.pd_dataframe_ext.median_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=None, skipna=None, level=None, numeric_only=None: None) @@ -909,7 +909,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.std_overload( + impl = sdc.hiframes.pd_dataframe_ext.std_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=None, skipna=None, level=None, ddof=1, numeric_only=None: None) @@ -922,7 +922,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.var_overload( + impl = sdc.hiframes.pd_dataframe_ext.var_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=None, skipna=None, level=None, ddof=1, numeric_only=None: None) @@ -935,7 +935,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.max_overload( + impl = sdc.hiframes.pd_dataframe_ext.max_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=None, skipna=None, level=None, numeric_only=None: None) @@ -948,7 +948,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.min_overload( + impl = sdc.hiframes.pd_dataframe_ext.min_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=None, skipna=None, level=None, numeric_only=None: None) @@ -961,7 +961,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.sum_overload( + impl = sdc.hiframes.pd_dataframe_ext.sum_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=None, skipna=None, level=None, numeric_only=None, min_count=0: None) @@ -974,7 +974,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.prod_overload( + impl = sdc.hiframes.pd_dataframe_ext.prod_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=None, skipna=None, level=None, numeric_only=None, min_count=0: None) @@ -987,7 +987,7 @@ def _run_call_dataframe(self, assign, lhs, rhs, df_var, func_name): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_dataframe_ext.count_overload( + impl = sdc.hiframes.pd_dataframe_ext.count_overload( *arg_typs, **kw_typs) stub = (lambda df, axis=0, level=None, numeric_only=False: None) return self._replace_func(impl, rhs.args, @@ -1044,21 +1044,21 @@ def _run_call_dataframe_apply(self, assign, lhs, rhs, df_var): func_text = "def f({}):\n".format(col_name_args) # make series to enable getitem of dt64 to timestamp for example for i in range(len(used_cols)): - func_text += " c{} = hpat.hiframes.api.init_series(c{})\n".format(i, i) + func_text += " c{} = sdc.hiframes.api.init_series(c{})\n".format(i, i) func_text += " numba.parfor.init_prange()\n" func_text += " n = len(c0)\n" func_text += " S = numba.unsafe.ndarray.empty_inferred((n,))\n" func_text += " for i in numba.parfor.internal_prange(n):\n" func_text += " row = Row({})\n".format(row_args) func_text += " S[i] = map_func(row)\n" - func_text += " return hpat.hiframes.api.init_series(S)\n" + func_text += " return sdc.hiframes.api.init_series(S)\n" loc_vars = {} - exec(func_text, {'hpat': hpat, 'numba': numba, 'Row': Row}, loc_vars) + exec(func_text, {'sdc': sdc, 'numba': numba, 'Row': Row}, loc_vars) f = loc_vars['f'] f_ir = compile_to_numba_ir( - f, {'numba': numba, 'np': np, 'Row': Row, 'hpat': hpat}) + f, {'numba': numba, 'np': np, 'Row': Row, 'sdc': sdc}) # fix definitions to enable finding sentinel f_ir._definitions = build_definitions(f_ir.blocks) topo_order = find_topo_order(f_ir.blocks) @@ -1110,7 +1110,7 @@ def _handle_df_describe(self, assign, lhs, rhs, df_var): func_text = "def f({}):\n".format(', '.join(col_name_args)) # compute stat values for c in col_name_args: - func_text += " {} = hpat.hiframes.api.init_series({})\n".format(c, c) + func_text += " {} = sdc.hiframes.api.init_series({})\n".format(c, c) func_text += " {}_count = np.float64({}.count())\n".format(c, c) func_text += " {}_min = {}.min()\n".format(c, c) func_text += " {}_max = {}.max()\n".format(c, c) @@ -1140,7 +1140,7 @@ def _handle_df_describe(self, assign, lhs, rhs, df_var): func_text += " 'max ' + {}\n".format(max_strs) loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np}, loc_vars) f = loc_vars['f'] nodes = [] @@ -1211,11 +1211,11 @@ def _run_call_df_itertuples(self, assign, lhs, rhs): name_consts = ', '.join(["'{}'".format(c) for c in df_typ.columns]) func_text = "def f({}):\n".format(col_name_args) - func_text += " return hpat.hiframes.api.get_itertuples({}, {})\n"\ + func_text += " return sdc.hiframes.api.get_itertuples({}, {})\n"\ .format(name_consts, col_name_args) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) f = loc_vars['f'] nodes = [] @@ -1234,13 +1234,13 @@ def _run_call_df_head(self, assign, lhs, rhs): func_text = "def _head_impl({}, n):\n".format(", ".join(data_args)) for d in data_args: - func_text += " {} = hpat.hiframes.api.init_series({})\n".format(d + '_S', d) - func_text += " {} = hpat.hiframes.api.get_series_data({}.head(n))\n".format(d + '_O', d + '_S') - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_text += " {} = sdc.hiframes.api.init_series({})\n".format(d + '_S', d) + func_text += " {} = sdc.hiframes.api.get_series_data({}.head(n))\n".format(d + '_O', d + '_S') + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( ", ".join(d + '_O' for d in data_args), ", ".join("'{}'".format(c) for c in df_typ.columns)) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _head_impl = loc_vars['_head_impl'] nodes = [] @@ -1260,13 +1260,13 @@ def _run_call_pct_change(self, assign, lhs, rhs): func_text = "def _pct_change_impl({}, n):\n".format(", ".join(data_args)) for d in data_args: - func_text += " {} = hpat.hiframes.api.init_series({})\n".format(d + '_S', d) - func_text += " {} = hpat.hiframes.api.get_series_data({}.pct_change(n))\n".format(d + '_O', d + '_S') - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_text += " {} = sdc.hiframes.api.init_series({})\n".format(d + '_S', d) + func_text += " {} = sdc.hiframes.api.get_series_data({}.pct_change(n))\n".format(d + '_O', d + '_S') + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( ", ".join(d + '_O' for d in data_args), ", ".join("'{}'".format(c) for c in df_typ.columns)) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _pct_change_impl = loc_vars['_pct_change_impl'] nodes = [] @@ -1287,15 +1287,15 @@ def _run_call_col_reduce(self, assign, lhs, rhs, func_name): func_text = "def _reduce_impl({}):\n".format(", ".join(data_args)) for d in data_args: - func_text += " {} = hpat.hiframes.api.init_series({})\n".format(d + '_S', d) + func_text += " {} = sdc.hiframes.api.init_series({})\n".format(d + '_S', d) func_text += " {} = {}.{}()\n".format(d + '_O', d + '_S', func_name) func_text += " data = np.array(({},))\n".format( ", ".join(d + '_O' for d in data_args)) - func_text += " index = hpat.str_arr_ext.StringArray(({},))\n".format( + func_text += " index = sdc.str_arr_ext.StringArray(({},))\n".format( ", ".join("'{}'".format(c) for c in df_typ.columns)) - func_text += " return hpat.hiframes.api.init_series(data, index)\n" + func_text += " return sdc.hiframes.api.init_series(data, index)\n" loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np}, loc_vars) _reduce_impl = loc_vars['_reduce_impl'] nodes = [] @@ -1316,17 +1316,17 @@ def _run_call_df_fillna(self, assign, lhs, rhs): func_text = "def _fillna_impl({}, val):\n".format(", ".join(data_args)) for d in data_args: - func_text += " {} = hpat.hiframes.api.init_series({})\n".format(d + '_S', d) + func_text += " {} = sdc.hiframes.api.init_series({})\n".format(d + '_S', d) if not inplace: func_text += " {} = {}.fillna(val)\n".format(d + '_S', d + '_S') else: func_text += " {}.fillna(val, inplace=True)\n".format(d + '_S') - func_text += " {} = hpat.hiframes.api.get_series_data({})\n".format(d + '_O', d + '_S') - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_text += " {} = sdc.hiframes.api.get_series_data({})\n".format(d + '_O', d + '_S') + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( ", ".join(d + '_O' for d in data_args), ", ".join("'{}'".format(c) for c in df_typ.columns)) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _fillna_impl = loc_vars['_fillna_impl'] nodes = [] @@ -1352,14 +1352,14 @@ def _run_call_df_dropna(self, assign, lhs, rhs): out_names = ", ".join('out' + str(i) for i in range(n_cols)) func_text = "def _dropna_imp({}, inplace):\n".format(arg_names) - func_text += " ({},) = hpat.hiframes.api.dropna(({},), inplace)\n".format( + func_text += " ({},) = sdc.hiframes.api.dropna(({},), inplace)\n".format( out_names, arg_names) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _dropna_imp = loc_vars['_dropna_imp'] f_block = compile_to_numba_ir(_dropna_imp, - {'hpat': hpat}, + {'sdc': sdc}, self.state.typingctx, df_typ.data + (self.state.typemap[inplace_var.name],), self.state.typemap, @@ -1397,11 +1397,11 @@ def _run_call_reset_index(self, assign, lhs, rhs): for d in data_args: if not inplace: func_text += " {} = {}.copy()\n".format(d, d) - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( ", ".join(data_args), ", ".join("'{}'".format(c) for c in df_typ.columns)) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _reset_index_impl = loc_vars['_reset_index_impl'] nodes = [] @@ -1442,16 +1442,16 @@ def _run_call_df_isna(self, assign, lhs, rhs): func_lines = [] func_lines.append("def _isna_impl({}):".format(", ".join(data_args))) for d in data_args: - func_lines.append(" {} = hpat.hiframes.api.init_series({})".format(d + '_S', d)) - func_lines.append(" {} = hpat.hiframes.api.get_series_data({}.isna())".format(d + '_O', d + '_S')) - func_lines.append(" return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_lines.append(" {} = sdc.hiframes.api.init_series({})".format(d + '_S', d)) + func_lines.append(" {} = sdc.hiframes.api.get_series_data({}.isna())".format(d + '_O', d + '_S')) + func_lines.append(" return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( init_df_args_data, init_df_args_cols )) func_text = '\n'.join(func_lines) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _isna_impl = loc_vars['_isna_impl'] nodes = [] @@ -1477,11 +1477,11 @@ def _run_call_df_astype(self, assign, lhs, rhs): func_lines = [] func_lines.append("def _astype_impl({}, new_dtype):\n".format(", ".join(data_args))) for d in data_args: - func_lines.append(" {} = hpat.hiframes.api.init_series({})\n".format(d + '_S', d)) + func_lines.append(" {} = sdc.hiframes.api.init_series({})\n".format(d + '_S', d)) func_lines.append( - " {} = hpat.hiframes.api.get_series_data({}.astype(new_dtype))\n".format( + " {} = sdc.hiframes.api.get_series_data({}.astype(new_dtype))\n".format( d + '_O', d + '_S')) - func_lines.append(" return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_lines.append(" return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( init_df_args_data, init_df_args_cols )) @@ -1518,10 +1518,10 @@ def _run_call_isin(self, assign, lhs, rhs): data = [self._get_dataframe_data(df_var, c, nodes) for c in df_typ.columns] def isin_func(A, B): - return hpat.hiframes.api.df_isin(A, B) + return sdc.hiframes.api.df_isin(A, B) def isin_vals_func(A, B): - return hpat.hiframes.api.df_isin_vals(A, B) + return sdc.hiframes.api.df_isin_vals(A, B) # create array of False values used when other col not available def bool_arr_func(A): @@ -1542,7 +1542,7 @@ def bool_arr_func(A): args = false_arr_args f_block = compile_to_numba_ir( func, - {'hpat': hpat, 'np': np}, + {'sdc': sdc, 'np': np}, self.state.typingctx, tuple(self.state.typemap[v.name] for v in args), self.state.typemap, @@ -1579,8 +1579,8 @@ def _impl(target, index, val): orig_arr = arr_def.value bool_arr = arr_def.index f_block = compile_to_numba_ir( - lambda arr, bool_arr: hpat.hiframes.api.series_filter_bool(arr, bool_arr), - {'hpat': hpat}, + lambda arr, bool_arr: sdc.hiframes.api.series_filter_bool(arr, bool_arr), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[orig_arr.name], self.state.typemap[bool_arr.name]), self.state.typemap, @@ -1593,8 +1593,8 @@ def _impl(target, index, val): # set unboxed df column with reflection if df_typ.has_parent: return self._replace_func( - lambda df, cname, arr: hpat.hiframes.pd_dataframe_ext.set_df_column_with_reflect( - df, cname, hpat.hiframes.api.fix_df_array(arr)), [df_var, rhs.args[1], new_arr], pre_nodes=nodes) + lambda df, cname, arr: sdc.hiframes.pd_dataframe_ext.set_df_column_with_reflect( + df, cname, sdc.hiframes.api.fix_df_array(arr)), [df_var, rhs.args[1], new_arr], pre_nodes=nodes) n_cols = len(df_typ.columns) in_arrs = [self._get_dataframe_data(df_var, c, nodes) @@ -1615,11 +1615,11 @@ def _impl(target, index, val): # TODO: fix list, Series data func_text = "def _init_df({}):\n".format(data_args) - func_text += " {} = hpat.hiframes.api.fix_df_array({})\n".format(new_arr_arg, new_arr_arg) - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_text += " {} = sdc.hiframes.api.fix_df_array({})\n".format(new_arr_arg, new_arr_arg) + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( data_args, col_args) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _init_df = loc_vars['_init_df'] return self._replace_func(_init_df, in_arrs, pre_nodes=nodes) @@ -1721,7 +1721,7 @@ def _run_call_groupby(self, assign, lhs, rhs, grp_var, func_name): and grp_typ.explicit_select and grp_typ.as_index) return self._replace_func( - lambda A: hpat.hiframes.api.init_series(A), + lambda A: sdc.hiframes.api.init_series(A), list(df_col_map.values()), pre_nodes=nodes) _init_df = _gen_init_df(out_typ.columns) @@ -1800,7 +1800,7 @@ def _agg_len_impl(in_arr): # pragma: no cover # TODO: make out_key_var an index column # TODO: check Series vs. array for index/columns - agg_node = hpat.hiframes.aggregate.Aggregate( + agg_node = sdc.hiframes.aggregate.Aggregate( lhs.name, 'crosstab', [index.name], None, df_col_map, in_vars, [index], _agg_len_impl, None, lhs.loc, pivot_arr, pivot_values, True) @@ -1902,7 +1902,7 @@ def f(arr): and rolling_typ.explicit_select and rolling_typ.as_index) return self._replace_func( - lambda A: hpat.hiframes.api.init_series(A), + lambda A: sdc.hiframes.api.init_series(A), list(df_col_map.values()), pre_nodes=nodes) _init_df = _gen_init_df(out_typ.columns) @@ -1923,51 +1923,51 @@ def _gen_rolling_call(self, in_col_var, out_col_var, window, center, args, if on_arr is not None: if func_name == 'cov': def f(arr, other, on_arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_cov( + df_arr = sdc.hiframes.rolling.rolling_cov( arr, other, on_arr, w, center) if func_name == 'corr': def f(arr, other, on_arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_corr( + df_arr = sdc.hiframes.rolling.rolling_corr( arr, other, on_arr, w, center) args = [in_col_var, other, on_arr, window, center] else: if func_name == 'cov': def f(arr, other, w, center): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_cov( + df_arr = sdc.hiframes.rolling.rolling_cov( arr, other, w, center) if func_name == 'corr': def f(arr, other, w, center): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_corr( + df_arr = sdc.hiframes.rolling.rolling_corr( arr, other, w, center) args = [in_col_var, other, window, center] # variable window case elif on_arr is not None: if func_name == 'apply': def f(arr, on_arr, w, center, func): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_variable( + df_arr = sdc.hiframes.rolling.rolling_variable( arr, on_arr, w, center, False, func) args = [in_col_var, on_arr, window, center, args[0]] else: def f(arr, on_arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_variable( + df_arr = sdc.hiframes.rolling.rolling_variable( arr, on_arr, w, center, False, _func_name) args = [in_col_var, on_arr, window, center] else: # fixed window # apply case takes the passed function instead of just name if func_name == 'apply': def f(arr, w, center, func): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_fixed( + df_arr = sdc.hiframes.rolling.rolling_fixed( arr, w, center, False, func) args = [in_col_var, window, center, args[0]] else: def f(arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_fixed( + df_arr = sdc.hiframes.rolling.rolling_fixed( arr, w, center, False, _func_name) args = [in_col_var, window, center] arg_typs = tuple(self.state.typemap[v.name] for v in args) f_block = compile_to_numba_ir(f, - {'hpat': hpat, '_func_name': func_name}, + {'sdc': sdc, '_func_name': func_name}, self.state.typingctx, arg_typs, self.state.typemap, @@ -1993,10 +1993,10 @@ def gen_nan_func(A): # gen concat function arg_names = ", ".join(['in{}'.format(i) for i in range(len(df_list))]) func_text = "def _concat_imp({}):\n".format(arg_names) - func_text += " return hpat.hiframes.api.concat(({}))\n".format( + func_text += " return sdc.hiframes.api.concat(({}))\n".format( arg_names) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _concat_imp = loc_vars['_concat_imp'] out_vars = [] @@ -2009,7 +2009,7 @@ def gen_nan_func(A): # generate full NaN column if cname not in df_typ.columns: f_block = compile_to_numba_ir(gen_nan_func, - {'hpat': hpat, 'np': np}, + {'sdc': sdc, 'np': np}, self.state.typingctx, (df_typ.data[0],), self.state.typemap, @@ -2024,7 +2024,7 @@ def gen_nan_func(A): arg_typs = tuple(self.state.typemap[v.name] for v in args) f_block = compile_to_numba_ir(_concat_imp, - {'hpat': hpat, 'np': np}, + {'sdc': sdc, 'np': np}, self.state.typingctx, arg_typs, self.state.typemap, @@ -2043,8 +2043,8 @@ def _run_call_concat_columns(self, objs, out_typ): out_vars = [] for obj in objs: f_block = compile_to_numba_ir( - lambda S: hpat.hiframes.api.get_series_data(S), - {'hpat': hpat}, + lambda S: sdc.hiframes.api.get_series_data(S), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[obj.name],), self.state.typemap, @@ -2094,7 +2094,7 @@ def _set_df_inplace(self, _init_df, out_arrs, df_var, loc, nodes): df_typ = self.state.typemap[df_var.name] arg_typs = tuple(self.state.typemap[v.name] for v in out_arrs) f_block = compile_to_numba_ir(_init_df, - {'hpat': hpat}, + {'sdc': sdc}, self.state.typingctx, arg_typs, self.state.typemap, @@ -2106,8 +2106,8 @@ def _set_df_inplace(self, _init_df, out_arrs, df_var, loc, nodes): if df_typ.has_parent: # XXX fix the output type using dummy call to set_parent=True f_block = compile_to_numba_ir( - lambda df: hpat.hiframes.pd_dataframe_ext.set_parent_dummy(df), - {'hpat': hpat}, + lambda df: sdc.hiframes.pd_dataframe_ext.set_parent_dummy(df), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[new_df.name],), self.state.typemap, @@ -2133,7 +2133,7 @@ def _get_dataframe_data(self, df_var, col_name, nodes): ind = df_typ.columns.index(col_name) var_def = guard(get_definition, self.state.func_ir, df_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext'): + if call_def == ('init_dataframe', 'sdc.hiframes.pd_dataframe_ext'): return var_def.args[ind] loc = df_var.loc @@ -2143,9 +2143,9 @@ def _get_dataframe_data(self, df_var, col_name, nodes): # XXX use get_series_data() for getting data instead of S._data # to enable alias analysis f_block = compile_to_numba_ir( - lambda df, c_ind: hpat.hiframes.pd_dataframe_ext.get_dataframe_data( + lambda df, c_ind: sdc.hiframes.pd_dataframe_ext.get_dataframe_data( df, c_ind), - {'hpat': hpat}, + {'sdc': sdc}, self.state.typingctx, (df_typ, self.state.typemap[ind_var.name]), self.state.typemap, @@ -2160,14 +2160,14 @@ def _get_dataframe_index(self, df_var, nodes): n_cols = len(df_typ.columns) var_def = guard(get_definition, self.state.func_ir, df_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext'): + if call_def == ('init_dataframe', 'sdc.hiframes.pd_dataframe_ext'): return var_def.args[n_cols] # XXX use get_series_data() for getting data instead of S._data # to enable alias analysis f_block = compile_to_numba_ir( - lambda df: hpat.hiframes.pd_dataframe_ext.get_dataframe_index(df), - {'hpat': hpat}, + lambda df: sdc.hiframes.pd_dataframe_ext.get_dataframe_index(df), + {'sdc': sdc}, self.state.typingctx, (df_typ,), self.state.typemap, @@ -2179,7 +2179,7 @@ def _get_dataframe_index(self, df_var, nodes): def _replace_func(self, func, args, const=False, pre_nodes=None, extra_globals=None, pysig=None, kws=None): - glbls = {'numba': numba, 'np': np, 'hpat': hpat, 'pd': pd} + glbls = {'numba': numba, 'np': np, 'sdc': sdc, 'pd': pd} if extra_globals is not None: glbls.update(extra_globals) @@ -2314,10 +2314,10 @@ def _gen_init_df(columns): data_args = ", ".join('data{}'.format(i) for i in range(n_cols)) func_text = "def _init_df({}):\n".format(data_args) - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( data_args, ", ".join("'{}'".format(c) for c in columns)) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _init_df = loc_vars['_init_df'] return _init_df diff --git a/hpat/hiframes/datetime_date_ext.py b/sdc/hiframes/datetime_date_ext.py similarity index 97% rename from hpat/hiframes/datetime_date_ext.py rename to sdc/hiframes/datetime_date_ext.py index 3f8a7d342..e0fa8f8e3 100644 --- a/hpat/hiframes/datetime_date_ext.py +++ b/sdc/hiframes/datetime_date_ext.py @@ -34,8 +34,8 @@ lower_builtin) from numba.typing import signature -import hpat -from hpat.hiframes.pd_timestamp_ext import (datetime_date_type, +import sdc +from sdc.hiframes.pd_timestamp_ext import (datetime_date_type, box_datetime_date_array) diff --git a/hpat/hiframes/filter.py b/sdc/hiframes/filter.py similarity index 96% rename from hpat/hiframes/filter.py rename to sdc/hiframes/filter.py index 00596d99c..3db62ff79 100644 --- a/hpat/hiframes/filter.py +++ b/sdc/hiframes/filter.py @@ -31,12 +31,12 @@ from numba import typeinfer, ir, ir_utils, config, types from numba.ir_utils import visit_vars_inner, replace_vars_inner from numba.typing import signature -import hpat -from hpat import distributed, distributed_analysis -from hpat.distributed_analysis import Distribution -from hpat.utils import debug_prints -from hpat.str_arr_ext import string_array_type -from hpat.hiframes.split_impl import string_array_split_view_type +import sdc +from sdc import distributed, distributed_analysis +from sdc.distributed_analysis import Distribution +from sdc.utils import debug_prints +from sdc.str_arr_ext import string_array_type +from sdc.hiframes.split_impl import string_array_split_view_type class Filter(ir.Stmt): @@ -65,7 +65,7 @@ def filter_array_analysis(filter_node, equiv_set, typemap, array_analysis): post = [] # empty filter nodes should be deleted in remove dead assert len(filter_node.df_in_vars) > 0, "empty filter in array analysis" - from hpat.str_ext import list_string_array_type + from sdc.str_ext import list_string_array_type # arrays of input df have same size in first dimension all_shapes = [] @@ -222,7 +222,7 @@ def visit_vars_filter(filter_node, callback, cbdata): def remove_dead_filter(filter_node, lives, arg_aliases, alias_map, func_ir, typemap): - if not hpat.hiframes.api.enable_hiframes_remove_dead: + if not sdc.hiframes.api.enable_hiframes_remove_dead: return filter_node dead_cols = [] diff --git a/hpat/hiframes/hiframes_typed.py b/sdc/hiframes/hiframes_typed.py similarity index 90% rename from hpat/hiframes/hiframes_typed.py rename to sdc/hiframes/hiframes_typed.py index a644c86ff..24289dadf 100644 --- a/hpat/hiframes/hiframes_typed.py +++ b/sdc/hiframes/hiframes_typed.py @@ -46,29 +46,29 @@ from numba.typing.templates import Signature, bound_function, signature, infer_global, AbstractTemplate, signature from numba.compiler_machinery import FunctionPass, register_pass -import hpat -from hpat.datatypes.hpat_pandas_stringmethods_types import StringMethodsType -from hpat.utils import (debug_prints, inline_new_blocks, ReplaceFunc, +import sdc +from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType +from sdc.utils import (debug_prints, inline_new_blocks, ReplaceFunc, is_whole_slice, is_array, update_globals) -from hpat.str_ext import (string_type, unicode_to_std_str, std_str_to_unicode, +from sdc.str_ext import (string_type, unicode_to_std_str, std_str_to_unicode, list_string_array_type) -from hpat.str_arr_ext import (string_array_type, StringArrayType, +from sdc.str_arr_ext import (string_array_type, StringArrayType, is_str_arr_typ, pre_alloc_string_array, get_utf8_size) -from hpat import hiframes -from hpat.hiframes import series_kernels, split_impl -from hpat.hiframes.pd_series_ext import (SeriesType, is_str_series_typ, +from sdc import hiframes +from sdc.hiframes import series_kernels, split_impl +from sdc.hiframes.pd_series_ext import (SeriesType, is_str_series_typ, series_to_array_type, is_dt64_series_typ, if_series_to_array_type, is_series_type, SeriesRollingType, SeriesIatType, explicit_binop_funcs, series_dt_methods_type) -from hpat.hiframes.pd_index_ext import DatetimeIndexType -from hpat.hiframes.rolling import get_rolling_setup_args -from hpat.hiframes.aggregate import Aggregate -from hpat.hiframes.series_kernels import series_replace_funcs -from hpat.hiframes.split_impl import (string_array_split_view_type, +from sdc.hiframes.pd_index_ext import DatetimeIndexType +from sdc.hiframes.rolling import get_rolling_setup_args +from sdc.hiframes.aggregate import Aggregate +from sdc.hiframes.series_kernels import series_replace_funcs +from sdc.hiframes.split_impl import (string_array_split_view_type, StringArraySplitViewType, getitem_c_arr, get_array_ctypes_ptr, get_split_view_index, get_split_view_data_ptr) -from hpat.io.pio_api import h5dataset_type +from sdc.io.pio_api import h5dataset_type _dt_index_binops = ('==', '!=', '>=', '>', '<=', '<', '-', @@ -150,7 +150,7 @@ def run_pass(self): else: if isinstance(inst, (Aggregate, hiframes.sort.Sort, hiframes.join.Join, hiframes.filter.Filter, - hpat.io.csv_ext.CsvReader)): + sdc.io.csv_ext.CsvReader)): out_nodes = self._handle_hiframes_nodes(inst) if isinstance(out_nodes, list): @@ -256,13 +256,13 @@ def _run_getitem(self, assign, rhs): def f(_in_arr, _ind): dt = _in_arr[_ind] s = np.int64(dt) - return hpat.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(s) + return sdc.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(s) data = self._get_series_data(in_arr, nodes) assert isinstance(self.state.typemap[ind_var.name], (types.Integer, types.IntegerLiteral)) f_block = compile_to_numba_ir(f, {'numba': numba, 'np': np, - 'hpat': hpat}, self.state.typingctx, + 'sdc': sdc}, self.state.typingctx, (self.state.typemap[data.name], types.intp), self.state.typemap, self.state.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [data, ind_var]) @@ -282,7 +282,7 @@ def f(_in_arr, _ind): self.state.typemap[lhs.name]) nodes.append(ir.Assign(rhs, new_lhs, lhs.loc)) return self._replace_func( - lambda A: hpat.hiframes.api.init_series(A), [new_lhs], + lambda A: sdc.hiframes.api.init_series(A), [new_lhs], pre_nodes=nodes) nodes.append(assign) @@ -377,7 +377,7 @@ def _run_getattr(self, assign, rhs): return nodes if isinstance(rhs_type, DatetimeIndexType): - if rhs.attr in hpat.hiframes.pd_timestamp_ext.date_fields: + if rhs.attr in sdc.hiframes.pd_timestamp_ext.date_fields: return self._run_DatetimeIndex_field(assign, assign.target, rhs) if rhs.attr == 'date': return self._run_DatetimeIndex_date(assign, assign.target, rhs) @@ -387,13 +387,13 @@ def _run_getattr(self, assign, rhs): if dt_def is None: # TODO: check for errors raise ValueError("invalid series.dt") rhs.value = dt_def.value - if rhs.attr in hpat.hiframes.pd_timestamp_ext.date_fields: + if rhs.attr in sdc.hiframes.pd_timestamp_ext.date_fields: return self._run_DatetimeIndex_field(assign, assign.target, rhs) if rhs.attr == 'date': return self._run_DatetimeIndex_date(assign, assign.target, rhs) - if isinstance(rhs_type, hpat.hiframes.pd_index_ext.TimedeltaIndexType): - if rhs.attr in hpat.hiframes.pd_timestamp_ext.timedelta_fields: + if isinstance(rhs_type, sdc.hiframes.pd_index_ext.TimedeltaIndexType): + if rhs.attr in sdc.hiframes.pd_timestamp_ext.timedelta_fields: return self._run_Timedelta_field(assign, assign.target, rhs) if isinstance(rhs_type, SeriesType) and rhs.attr == 'size': @@ -442,7 +442,7 @@ def _run_binop(self, assign, rhs): self.state.typemap[out_data.name] = self.state.calltypes[rhs].return_type nodes.append(ir.Assign(rhs, out_data, rhs.loc)) return self._replace_func( - lambda data: hpat.hiframes.api.init_series(data, None, None), + lambda data: sdc.hiframes.api.init_series(data, None, None), [out_data], pre_nodes=nodes ) @@ -461,7 +461,7 @@ def _run_unary(self, assign, rhs): self.state.typemap[out_data.name] = self.state.calltypes[rhs].return_type nodes.append(ir.Assign(rhs, out_data, rhs.loc)) return self._replace_func( - lambda data: hpat.hiframes.api.init_series(data), + lambda data: sdc.hiframes.api.init_series(data), [out_data], pre_nodes=nodes ) @@ -494,23 +494,23 @@ def _run_call(self, assign, lhs, rhs): # functions which are used from Numba directly by calling from StringMethodsType # other functions (for example, 'capitalize' is not presented in Numba) goes to be replaced here - if func_name not in hpat.hiframes.pd_series_ext.str2str_methods_excluded: + if func_name not in sdc.hiframes.pd_series_ext.str2str_methods_excluded: return self._run_series_str_method(assign, assign.target, series_var, func_name, rhs) # replace _get_type_max_value(arr.dtype) since parfors # arr.dtype transformation produces invalid code for dt64 # TODO: min - if fdef == ('_get_type_max_value', 'hpat.hiframes.hiframes_typed'): + if fdef == ('_get_type_max_value', 'sdc.hiframes.hiframes_typed'): if self.state.typemap[rhs.args[0].name] == types.DType(types.NPDatetime('ns')): return self._replace_func( - lambda: hpat.hiframes.pd_timestamp_ext.integer_to_dt64( + lambda: sdc.hiframes.pd_timestamp_ext.integer_to_dt64( numba.targets.builtins.get_type_max_value( numba.types.int64)), []) return self._replace_func( lambda d: numba.targets.builtins.get_type_max_value( d), rhs.args) - if fdef == ('h5_read_dummy', 'hpat.io.pio_api'): + if fdef == ('h5_read_dummy', 'sdc.io.pio_api'): ndim = guard(find_const, self.state.func_ir, rhs.args[1]) dtype_str = guard(find_const, self.state.func_ir, rhs.args[2]) index_var = rhs.args[3] @@ -518,30 +518,30 @@ def _run_call(self, assign, lhs, rhs): func_text = "def _h5_read_impl(dset_id, ndim, dtype_str, index):\n" if guard(is_whole_slice, self.state.typemap, self.state.func_ir, index_var): - func_text += " size_0 = hpat.io.pio_api.h5size(dset_id, np.int32(0))\n" + func_text += " size_0 = sdc.io.pio_api.h5size(dset_id, np.int32(0))\n" else: # TODO: check index format for this case filter_read = True assert isinstance(self.state.typemap[index_var.name], types.BaseTuple) - func_text += " read_indices = hpat.io.pio_api.get_filter_read_indices(index[0])\n" + func_text += " read_indices = sdc.io.pio_api.get_filter_read_indices(index[0])\n" func_text += " size_0 = len(read_indices)\n" for i in range(1, ndim): - func_text += " size_{} = hpat.io.pio_api.h5size(dset_id, np.int32({}))\n".format(i, i) + func_text += " size_{} = sdc.io.pio_api.h5size(dset_id, np.int32({}))\n".format(i, i) func_text += " arr_shape = ({},)\n".format( ", ".join(["size_{}".format(i) for i in range(ndim)])) func_text += " zero_tup = ({},)\n".format(", ".join(["0"] * ndim)) func_text += " A = np.empty(arr_shape, np.{})\n".format( dtype_str) if filter_read: - func_text += " err = hpat.io.pio_api.h5read_filter(dset_id, np.int32({}),\n".format(ndim) + func_text += " err = sdc.io.pio_api.h5read_filter(dset_id, np.int32({}),\n".format(ndim) func_text += " zero_tup, arr_shape, 0, A, read_indices)\n" else: - func_text += " err = hpat.io.pio_api.h5read(dset_id, np.int32({}),\n".format(ndim) + func_text += " err = sdc.io.pio_api.h5read(dset_id, np.int32({}),\n".format(ndim) func_text += " zero_tup, arr_shape, 0, A)\n" func_text += " return A\n" loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np}, loc_vars) _h5_read_impl = loc_vars['_h5_read_impl'] return self._replace_func(_h5_read_impl, rhs.args) @@ -553,15 +553,15 @@ def _run_call(self, assign, lhs, rhs): kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_series_ext.pd_series_overload( + impl = sdc.hiframes.pd_series_ext.pd_series_overload( *arg_typs, **kw_typs) return self._replace_func(impl, rhs.args, pysig=self.state.calltypes[rhs].pysig, kws=dict(rhs.kws)) - if func_mod == 'hpat.hiframes.api': + if func_mod == 'sdc.hiframes.api': return self._run_call_hiframes(assign, assign.target, rhs, func_name) - if func_mod == 'hpat.hiframes.rolling': + if func_mod == 'sdc.hiframes.rolling': return self._run_call_rolling(assign, assign.target, rhs, func_name) if fdef == ('empty_like', 'numpy'): @@ -583,7 +583,7 @@ def _run_call(self, assign, lhs, rhs): return self._run_call_dt_index( assign, assign.target, rhs, func_mod, func_name) - if (fdef == ('concat_dummy', 'hpat.hiframes.pd_dataframe_ext') + if (fdef == ('concat_dummy', 'sdc.hiframes.pd_dataframe_ext') and isinstance(self.state.typemap[lhs], SeriesType)): return self._run_call_concat(assign, lhs, rhs) @@ -591,28 +591,28 @@ def _run_call(self, assign, lhs, rhs): if fdef == ('sorted', 'builtins') and 'key' in dict(rhs.kws): return self._handle_sorted_by_key(rhs) - if fdef == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('init_dataframe', 'sdc.hiframes.pd_dataframe_ext'): return [assign] # XXX sometimes init_dataframe() can't be resolved in dataframe_pass # and there are get_dataframe_data() calls that could be optimized # example: test_sort_parallel - if fdef == ('get_dataframe_data', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('get_dataframe_data', 'sdc.hiframes.pd_dataframe_ext'): df_var = rhs.args[0] df_typ = self.state.typemap[df_var.name] ind = guard(find_const, self.state.func_ir, rhs.args[1]) var_def = guard(get_definition, self.state.func_ir, df_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext'): + if call_def == ('init_dataframe', 'sdc.hiframes.pd_dataframe_ext'): assign.value = var_def.args[ind] - if fdef == ('get_dataframe_index', 'hpat.hiframes.pd_dataframe_ext'): + if fdef == ('get_dataframe_index', 'sdc.hiframes.pd_dataframe_ext'): df_var = rhs.args[0] df_typ = self.state.typemap[df_var.name] n_cols = len(df_typ.columns) var_def = guard(get_definition, self.state.func_ir, df_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_dataframe', 'hpat.hiframes.pd_dataframe_ext'): + if call_def == ('init_dataframe', 'sdc.hiframes.pd_dataframe_ext'): assign.value = var_def.args[n_cols] # convert Series to Array for unhandled calls @@ -629,7 +629,7 @@ def _run_call(self, assign, lhs, rhs): rhs.args = new_args # Second condition is to avoid chenging SeriesGroupBy class members - # test: python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_groupby_count + # test: python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_groupby_count if isinstance(self.state.typemap[lhs], SeriesType) and not isinstance(func_mod, ir.Var): scope = assign.target.scope new_lhs = ir.Var(scope, mk_unique_var(lhs + '_data'), rhs.loc) @@ -637,7 +637,7 @@ def _run_call(self, assign, lhs, rhs): nodes.append(ir.Assign(rhs, new_lhs, rhs.loc)) def _replace_func_param_impl(A): - return hpat.hiframes.api.init_series(A) + return sdc.hiframes.api.init_series(A) return self._replace_func(_replace_func_param_impl, [new_lhs], pre_nodes=nodes) nodes.append(assign) @@ -661,7 +661,7 @@ def _run_call_hiframes(self, assign, lhs, rhs, func_name): # but it can be removed sometimes var_def = guard(get_definition, self.state.func_ir, rhs.args[0]) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_datetime_index', 'hpat.hiframes.api'): + if call_def == ('init_datetime_index', 'sdc.hiframes.api'): assign.value = var_def.args[0] return [assign] @@ -670,7 +670,7 @@ def _run_call_hiframes(self, assign, lhs, rhs, func_name): # but it can be removed sometimes var_def = guard(get_definition, self.state.func_ir, rhs.args[0]) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_series', 'hpat.hiframes.api'): + if call_def == ('init_series', 'sdc.hiframes.api'): assign.value = var_def.args[0] return [assign] @@ -680,7 +680,7 @@ def _run_call_hiframes(self, assign, lhs, rhs, func_name): # arr = fix_df_array(col) -> arr=col if col is array if func_name == 'fix_df_array': in_typ = self.state.typemap[rhs.args[0].name] - impl = hpat.hiframes.api.fix_df_array_overload(in_typ) + impl = sdc.hiframes.api.fix_df_array_overload(in_typ) return self._replace_func(impl, rhs.args) # arr = fix_rolling_array(col) -> arr=col if col is float array @@ -693,7 +693,7 @@ def _run_call_hiframes(self, assign, lhs, rhs, func_name): def f(column): # pragma: no cover a = column.astype(np.float64) f_block = compile_to_numba_ir(f, - {'hpat': hpat, 'np': np}, self.state.typingctx, + {'sdc': sdc, 'np': np}, self.state.typingctx, (if_series_to_array_type(self.state.typemap[in_arr.name]),), self.state.typemap, self.state.calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [in_arr]) @@ -825,8 +825,8 @@ def _flatten_impl(A): for s in elems: flat_list.append(s) - return hpat.hiframes.api.init_series( - hpat.hiframes.api.parallel_fix_df_array(flat_list)) + return sdc.hiframes.api.init_series( + sdc.hiframes.api.parallel_fix_df_array(flat_list)) return self._replace_func(_flatten_impl, [arg], pre_nodes=nodes) if func_name == 'to_numeric': @@ -841,9 +841,9 @@ def _to_numeric_impl(A): n = len(A) B = np.empty(n, out_dtype) for i in numba.parfor.internal_prange(n): - hpat.str_arr_ext.str_arr_item_to_numeric(B, i, A, i) + sdc.str_arr_ext.str_arr_item_to_numeric(B, i, A, i) - return hpat.hiframes.api.init_series(B) + return sdc.hiframes.api.init_series(B) nodes = [] data = self._get_series_data(rhs.args[0], nodes) @@ -860,7 +860,7 @@ def parse_impl(data): n = len(data) S = numba.unsafe.ndarray.empty_inferred((n,)) for i in numba.parfor.internal_prange(n): - S[i] = hpat.hiframes.pd_timestamp_ext.parse_datetime_str(data[i]) + S[i] = sdc.hiframes.pd_timestamp_ext.parse_datetime_str(data[i]) return S return self._replace_func(parse_impl, [data], pre_nodes=nodes) @@ -917,10 +917,10 @@ def _run_call_series(self, assign, lhs, rhs, series_var, func_name): data = self._get_series_data(series_var, nodes) def run_call_series_quantile(A, q): - return hpat.hiframes.api.quantile(A, q) + return sdc.hiframes.api.quantile(A, q) def run_call_series_quantile_default(A): - return hpat.hiframes.api.quantile(A, 0.5) + return sdc.hiframes.api.quantile(A, 0.5) if len(rhs.args) == 0: args = [data] @@ -944,7 +944,7 @@ def run_call_series_quantile_default(A): index = self._get_series_index(series_var, nodes) name = rhs.args[0] return self._replace_func( - lambda data, index, name: hpat.hiframes.api.init_series( + lambda data, index, name: sdc.hiframes.api.init_series( data, index, name), [data, index, name], pre_nodes=nodes) @@ -1061,7 +1061,7 @@ def run_call_series_quantile_default(A): # # TODO: handle args like sort=False # # def func(A, B): - # return hpat.hiframes.api.init_series(A, B).sort_values(ascending=False) + # return sdc.hiframes.api.init_series(A, B).sort_values(ascending=False) # return self._replace_func(func, [out_data_var, out_key_var], pre_nodes=nodes) # astype with string output @@ -1100,11 +1100,11 @@ def run_call_series_quantile_default(A): arg_names = ", ".join("arg{}".format(i) for i in range(n_args)) sep_comma = ", " if n_args > 0 else "" func_text = "def _func_impl(A{}{}):\n".format(sep_comma, arg_names) - func_text += (" return hpat.hiframes.api.init_series(A.{}({}))\n" + func_text += (" return sdc.hiframes.api.init_series(A.{}({}))\n" ).format(func_name, arg_names) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _func_impl = loc_vars['_func_impl'] return self._replace_func(_func_impl, [data] + rhs.args, pre_nodes=nodes) @@ -1160,7 +1160,7 @@ def _handle_series_sort(self, lhs, rhs, series_var, is_argsort): # create output Series return self._replace_func( - lambda A, B: hpat.hiframes.api.init_series(A, B), + lambda A, B: sdc.hiframes.api.init_series(A, B), args, pre_nodes=nodes) @@ -1182,7 +1182,7 @@ def _run_call_series_fillna(self, assign, lhs, rhs, series_var): # optimization: just set null bit if fill is empty if guard(find_const, self.state.func_ir, val) == "": return self._replace_func( - lambda A: hpat.str_arr_ext.set_null_bits(A), + lambda A: sdc.str_arr_ext.set_null_bits(A), [data], pre_nodes=nodes) # Since string arrays can't be changed, we have to create a new @@ -1193,14 +1193,14 @@ def _run_call_series_fillna(self, assign, lhs, rhs, series_var): def str_fillna_impl(A, fill, name): # not using A.fillna since definition list is not working # for A to find callname - return hpat.hiframes.api.fillna_str_alloc(A, fill, name) + return sdc.hiframes.api.fillna_str_alloc(A, fill, name) # A.fillna(fill) assign.target = series_var # replace output return self._replace_func(str_fillna_impl, [data, val, name], pre_nodes=nodes) else: return self._replace_func( - lambda a, b, c: hpat.hiframes.api.fillna(a, b, c), + lambda a, b, c: sdc.hiframes.api.fillna(a, b, c), [data, data, val], pre_nodes=nodes) else: @@ -1230,7 +1230,7 @@ def _run_call_series_dropna(self, assign, lhs, rhs, series_var): def dropna_impl(A, name): # not using A.dropna since definition list is not working # for A to find callname - return hpat.hiframes.api.dropna(A, name) + return sdc.hiframes.api.dropna(A, name) assign.target = series_var # replace output return self._replace_func(dropna_impl, [data, name], pre_nodes=nodes) @@ -1242,7 +1242,7 @@ def dropna_impl(A, name): else: # integer case, TODO: bool, date etc. def func(A, name): - return hpat.hiframes.api.init_series(A, None, name) + return sdc.hiframes.api.init_series(A, None, name) return self._replace_func(func, [data, name], pre_nodes=nodes) def _handle_series_map(self, assign, lhs, rhs, series_var): @@ -1269,26 +1269,26 @@ def _handle_series_map(self, assign, lhs, rhs, series_var): func_text += " S = numba.unsafe.ndarray.empty_inferred((n,))\n" func_text += " for i in numba.parfor.internal_prange(n):\n" if dtype == types.NPDatetime('ns'): - func_text += " t = hpat.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(np.int64(A[i]))\n" + func_text += " t = sdc.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(np.int64(A[i]))\n" elif isinstance(dtype, types.BaseTuple): - func_text += " t = hpat.hiframes.api.convert_rec_to_tup(A[i])\n" + func_text += " t = sdc.hiframes.api.convert_rec_to_tup(A[i])\n" else: func_text += " t = A[i]\n" func_text += " v = map_func(t)\n" if isinstance(out_typ, types.BaseTuple): - func_text += " S[i] = hpat.hiframes.api.convert_tup_to_rec(v)\n" + func_text += " S[i] = sdc.hiframes.api.convert_tup_to_rec(v)\n" else: func_text += " S[i] = v\n" # func_text += " print(S[i])\n" - func_text += " return hpat.hiframes.api.init_series(S)\n" + func_text += " return sdc.hiframes.api.init_series(S)\n" #func_text += " return ret\n" loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'numba': numba}, loc_vars) f = loc_vars['f'] _globals = self.state.func_ir.func_id.func.__globals__ - f_ir = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'hpat': hpat}) + f_ir = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'sdc': sdc}) # fix definitions to enable finding sentinel f_ir._definitions = build_definitions(f_ir.blocks) @@ -1338,11 +1338,11 @@ def _run_call_rolling(self, assign, lhs, rhs, func_name): if func_name == 'rolling_corr': def rolling_corr_impl(arr, other, win, center): - cov = hpat.hiframes.rolling.rolling_cov( + cov = sdc.hiframes.rolling.rolling_cov( arr, other, win, center) - a_std = hpat.hiframes.rolling.rolling_fixed( + a_std = sdc.hiframes.rolling.rolling_fixed( arr, win, center, False, 'std') - b_std = hpat.hiframes.rolling.rolling_fixed( + b_std = sdc.hiframes.rolling.rolling_fixed( other, win, center, False, 'std') return cov / (a_std * b_std) return self._replace_func( @@ -1354,13 +1354,13 @@ def rolling_cov_impl(arr, other, w, center): # pragma: no cover Y = other.astype(np.float64) XpY = X + Y XtY = X * Y - count = hpat.hiframes.rolling.rolling_fixed( + count = sdc.hiframes.rolling.rolling_fixed( XpY, w, center, False, 'count') - mean_XtY = hpat.hiframes.rolling.rolling_fixed( + mean_XtY = sdc.hiframes.rolling.rolling_fixed( XtY, w, center, False, 'mean') - mean_X = hpat.hiframes.rolling.rolling_fixed( + mean_X = sdc.hiframes.rolling.rolling_fixed( X, w, center, False, 'mean') - mean_Y = hpat.hiframes.rolling.rolling_fixed( + mean_Y = sdc.hiframes.rolling.rolling_fixed( Y, w, center, False, 'mean') bias_adj = count / (count - ddof) return (mean_XtY - mean_X * mean_Y) * bias_adj @@ -1378,9 +1378,9 @@ def rolling_cov_impl(arr, other, w, center): # pragma: no cover func_node, dtype, out_dtype) def f(arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_fixed( + df_arr = sdc.hiframes.rolling.rolling_fixed( arr, w, center, False, _func) - f_block = compile_to_numba_ir(f, {'hpat': hpat, '_func': imp_dis}, + f_block = compile_to_numba_ir(f, {'sdc': sdc, '_func': imp_dis}, self.state.typingctx, tuple(self.state.typemap[v.name] for v in rhs.args[:-2]), self.state.typemap, self.state.calltypes).blocks.popitem()[1] @@ -1399,9 +1399,9 @@ def f(arr, w, center): # pragma: no cover func_node, dtype, out_dtype) def f(arr, on_arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.rolling.rolling_variable( + df_arr = sdc.hiframes.rolling.rolling_variable( arr, on_arr, w, center, False, _func) - f_block = compile_to_numba_ir(f, {'hpat': hpat, '_func': imp_dis}, + f_block = compile_to_numba_ir(f, {'sdc': sdc, '_func': imp_dis}, self.state.typingctx, tuple(self.state.typemap[v.name] for v in rhs.args[:-2]), self.state.typemap, self.state.calltypes).blocks.popitem()[1] @@ -1474,14 +1474,14 @@ def _handle_series_combine(self, assign, lhs, rhs, series_var): func_text += " if i < n2:\n" func_text += " t2 = B[i]\n" func_text += " S[i] = map_func(t1, t2)\n" - func_text += " return hpat.hiframes.api.init_series(S)\n" + func_text += " return sdc.hiframes.api.init_series(S)\n" loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'numba': numba}, loc_vars) f = loc_vars['f'] _globals = self.state.func_ir.func_id.func.__globals__ - f_ir = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'hpat': hpat}) + f_ir = compile_to_numba_ir(f, {'numba': numba, 'np': np, 'sdc': sdc}) # fix definitions to enable finding sentinel f_ir._definitions = build_definitions(f_ir.blocks) @@ -1550,10 +1550,10 @@ def _run_call_series_rolling(self, assign, lhs, rhs, rolling_var, func_name): other = data if func_name == 'cov': def f(a, b, w, c): - return hpat.hiframes.api.init_series(hpat.hiframes.rolling.rolling_cov(a, b, w, c)) + return sdc.hiframes.api.init_series(sdc.hiframes.rolling.rolling_cov(a, b, w, c)) if func_name == 'corr': def f(a, b, w, c): - return hpat.hiframes.api.init_series(hpat.hiframes.rolling.rolling_corr(a, b, w, c)) + return sdc.hiframes.api.init_series(sdc.hiframes.rolling.rolling_corr(a, b, w, c)) return self._replace_func(f, [data, other, window, center], pre_nodes=nodes) elif func_name == 'apply': @@ -1565,7 +1565,7 @@ def f(a, b, w, c): func_global = func_name def f(arr, w, center): # pragma: no cover - return hpat.hiframes.api.init_series(hpat.hiframes.rolling.rolling_fixed(arr, w, center, False, _func)) + return sdc.hiframes.api.init_series(sdc.hiframes.rolling.rolling_fixed(arr, w, center, False, _func)) args = [data, window, center] return self._replace_func(f, args, pre_nodes=nodes, extra_globals={'_func': func_global}) @@ -1589,7 +1589,7 @@ def _handle_rolling_apply_func(self, func_node, dtype, out_dtype): # XXX seq pipeline used since dist pass causes a hang m = numba.ir_utils._max_label impl_disp = numba.njit( - kernel_func, pipeline_class=hpat.compiler.SDCPipelineSeq) + kernel_func, pipeline_class=sdc.compiler.SDCPipelineSeq) # precompile to avoid REP counting conflict in testing sig = out_dtype(types.Array(dtype, 1, 'C')) impl_disp.compile(sig) @@ -1616,15 +1616,15 @@ def _run_DatetimeIndex_field(self, assign, lhs, rhs): # TODO: why doesn't empty_inferred work for t4 mortgage test? func_text += ' S = np.empty(n, np.int64)\n' func_text += ' for i in numba.parfor.internal_prange(n):\n' - func_text += ' dt64 = hpat.hiframes.pd_timestamp_ext.dt64_to_integer(dti[i])\n' - func_text += ' ts = hpat.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(dt64)\n' + func_text += ' dt64 = sdc.hiframes.pd_timestamp_ext.dt64_to_integer(dti[i])\n' + func_text += ' ts = sdc.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(dt64)\n' func_text += ' S[i] = ts.' + field + '\n' if is_dt_index: # TODO: support Int64Index - func_text += ' return hpat.hiframes.api.init_series(S)\n' + func_text += ' return sdc.hiframes.api.init_series(S)\n' else: - func_text += ' return hpat.hiframes.api.init_series(S)\n' + func_text += ' return sdc.hiframes.api.init_series(S)\n' loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'numba': numba}, loc_vars) f = loc_vars['f'] return self._replace_func(f, [arr], pre_nodes=nodes) @@ -1646,17 +1646,17 @@ def _run_DatetimeIndex_date(self, assign, lhs, rhs): func_text += ' n = len(dti)\n' func_text += ' S = numba.unsafe.ndarray.empty_inferred((n,))\n' func_text += ' for i in numba.parfor.internal_prange(n):\n' - func_text += ' dt64 = hpat.hiframes.pd_timestamp_ext.dt64_to_integer(dti[i])\n' - func_text += ' ts = hpat.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(dt64)\n' - func_text += ' S[i] = hpat.hiframes.pd_timestamp_ext.datetime_date_ctor(ts.year, ts.month, ts.day)\n' + func_text += ' dt64 = sdc.hiframes.pd_timestamp_ext.dt64_to_integer(dti[i])\n' + func_text += ' ts = sdc.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(dt64)\n' + func_text += ' S[i] = sdc.hiframes.pd_timestamp_ext.datetime_date_ctor(ts.year, ts.month, ts.day)\n' #func_text += ' S[i] = datetime.date(ts.year, ts.month, ts.day)\n' #func_text += ' S[i] = ts.day + (ts.month << 16) + (ts.year << 32)\n' if is_dt_index: # DatetimeIndex returns Array but Series.dt returns Series - func_text += ' return hpat.hiframes.datetime_date_ext.np_arr_to_array_datetime_date(S)\n' + func_text += ' return sdc.hiframes.datetime_date_ext.np_arr_to_array_datetime_date(S)\n' else: - func_text += ' return hpat.hiframes.api.init_series(S)\n' + func_text += ' return sdc.hiframes.api.init_series(S)\n' loc_vars = {} - exec(func_text, {'hpat': hpat, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'numba': numba}, loc_vars) f = loc_vars['f'] return self._replace_func(f, [arr], pre_nodes=nodes) @@ -1684,7 +1684,7 @@ def _run_Timedelta_field(self, assign, lhs, rhs): func_text += ' n = len(dti)\n' func_text += ' S = numba.unsafe.ndarray.empty_inferred((n,))\n' func_text += ' for i in numba.parfor.internal_prange(n):\n' - func_text += ' dt64 = hpat.hiframes.pd_timestamp_ext.timedelta64_to_integer(dti[i])\n' + func_text += ' dt64 = sdc.hiframes.pd_timestamp_ext.timedelta64_to_integer(dti[i])\n' if field == 'nanoseconds': func_text += ' S[i] = dt64 % 1000\n' elif field == 'microseconds': @@ -1697,7 +1697,7 @@ def _run_Timedelta_field(self, assign, lhs, rhs): assert(0) func_text += ' return S\n' loc_vars = {} - exec(func_text, {'hpat': hpat, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'numba': numba}, loc_vars) f = loc_vars['f'] return self._replace_func(f, [arr], pre_nodes=nodes) @@ -1708,14 +1708,14 @@ def _run_pd_DatetimeIndex(self, assign, lhs, rhs): arg_typs = tuple(self.state.typemap[v.name] for v in rhs.args) kw_typs = {name: self.state.typemap[v.name] for name, v in dict(rhs.kws).items()} - impl = hpat.hiframes.pd_index_ext.pd_datetimeindex_overload( + impl = sdc.hiframes.pd_index_ext.pd_datetimeindex_overload( *arg_typs, **kw_typs) return self._replace_func(impl, rhs.args, pysig=self.state.calltypes[rhs].pysig, kws=dict(rhs.kws)) def _run_series_str_method(self, assign, lhs, series_var, func_name, rhs): - supported_methods = (hpat.hiframes.pd_series_ext.str2str_methods + supported_methods = (sdc.hiframes.pd_series_ext.str2str_methods + ['len', 'replace', 'split', 'get', 'contains']) if func_name not in supported_methods: raise NotImplementedError("Series.str.{} is not supported yet".format(func_name)) @@ -1724,7 +1724,7 @@ def _run_series_str_method(self, assign, lhs, series_var, func_name, rhs): arr = self._get_series_data(series_var, nodes) # string 2 string methods - if func_name in hpat.hiframes.pd_series_ext.str2str_methods: + if func_name in sdc.hiframes.pd_series_ext.str2str_methods: func_text = 'def f(str_arr):\n' func_text += ' numba.parfor.init_prange()\n' func_text += ' n = len(str_arr)\n' @@ -1735,18 +1735,18 @@ def _run_series_str_method(self, assign, lhs, series_var, func_name, rhs): func_text += ' num_chars = 0\n' func_text += ' for i in numba.parfor.internal_prange(n):\n' func_text += ' num_chars += get_utf8_size(str_arr[i].{}())\n'.format(func_name) - func_text += ' S = hpat.str_arr_ext.pre_alloc_string_array(n, num_chars)\n' + func_text += ' S = sdc.str_arr_ext.pre_alloc_string_array(n, num_chars)\n' func_text += ' for i in numba.parfor.internal_prange(n):\n' func_text += ' S[i] = str_arr[i].{}()\n'.format(func_name) - func_text += ' return hpat.hiframes.api.init_series(S)\n' + func_text += ' return sdc.hiframes.api.init_series(S)\n' loc_vars = {} # print(func_text) - exec(func_text, {'hpat': hpat, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'numba': numba}, loc_vars) f = loc_vars['f'] return self._replace_func(f, [arr], pre_nodes=nodes, extra_globals={ - 'num_total_chars': hpat.str_arr_ext.num_total_chars, - 'get_utf8_size': hpat.str_arr_ext.get_utf8_size, + 'num_total_chars': sdc.str_arr_ext.num_total_chars, + 'get_utf8_size': sdc.str_arr_ext.get_utf8_size, }) if func_name == 'contains': @@ -1773,9 +1773,9 @@ def _run_series_str_method(self, assign, lhs, series_var, func_name, rhs): func_text += ' for i in numba.parfor.internal_prange(n):\n' func_text += ' val = str_arr[i]\n' func_text += ' S[i] = len(val)\n' - func_text += ' return hpat.hiframes.api.init_series(S)\n' + func_text += ' return sdc.hiframes.api.init_series(S)\n' loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'numba': numba}, loc_vars) f = loc_vars['f'] return self._replace_func(f, [arr], pre_nodes=nodes) @@ -1818,18 +1818,18 @@ def _run_series_str_split(self, assign, lhs, arr, rhs, nodes): def _str_split_impl(str_arr, sep): numba.parfor.init_prange() n = len(str_arr) - out_arr = hpat.str_ext.alloc_list_list_str(n) + out_arr = sdc.str_ext.alloc_list_list_str(n) for i in numba.parfor.internal_prange(n): in_str = str_arr[i] out_arr[i] = in_str.split(sep) - return hpat.hiframes.api.init_series(out_arr) + return sdc.hiframes.api.init_series(out_arr) if isinstance(sep_typ, types.StringLiteral) and len(sep_typ.literal_value) == 1: def _str_split_impl(str_arr, sep): - out_arr = hpat.hiframes.split_impl.compute_split_view( + out_arr = sdc.hiframes.split_impl.compute_split_view( str_arr, sep) - return hpat.hiframes.api.init_series(out_arr) + return sdc.hiframes.api.init_series(out_arr) return self._replace_func(_str_split_impl, [arr, sep], pre_nodes=nodes) @@ -1844,7 +1844,7 @@ def _str_get_impl(str_arr, ind): numba.parfor.init_prange() n = len(str_arr) n_total_chars = 0 - str_list = hpat.str_ext.alloc_str_list(n) + str_list = sdc.str_ext.alloc_str_list(n) for i in numba.parfor.internal_prange(n): # TODO: support NAN in_list_str = str_arr[i] @@ -1856,7 +1856,7 @@ def _str_get_impl(str_arr, ind): for i in numba.parfor.internal_prange(n): _str = str_list[i] out_arr[i] = _str - return hpat.hiframes.api.init_series(out_arr) + return sdc.hiframes.api.init_series(out_arr) if arr_typ == string_array_split_view_type: # TODO: refactor and enable distributed @@ -1872,8 +1872,8 @@ def _str_get_impl(arr, ind): for i in numba.parfor.internal_prange(n): data_start, length = get_split_view_index(arr, i, ind) ptr = get_split_view_data_ptr(arr, data_start) - hpat.str_arr_ext.setitem_str_arr_ptr(out_arr, i, ptr, length) - return hpat.hiframes.api.init_series(out_arr) + sdc.str_arr_ext.setitem_str_arr_ptr(out_arr, i, ptr, length) + return sdc.hiframes.api.init_series(out_arr) return self._replace_func(_str_get_impl, [arr, ind_var], pre_nodes=nodes, @@ -1911,14 +1911,14 @@ def _is_allowed_type(t): # TODO: this has to be more generic to support all combinations. if (is_dt64_series_typ(self.state.typemap[arg1.name]) - and self.state.typemap[arg2.name] == hpat.hiframes.pd_timestamp_ext.pandas_timestamp_type + and self.state.typemap[arg2.name] == sdc.hiframes.pd_timestamp_ext.pandas_timestamp_type and rhs.fn in ('-', operator.sub)): return self._replace_func( series_kernels._column_sub_impl_datetime_series_timestamp, [arg1, arg2]) if (isinstance(self.state.typemap[arg1.name], DatetimeIndexType) - and self.state.typemap[arg2.name] == hpat.hiframes.pd_timestamp_ext.pandas_timestamp_type + and self.state.typemap[arg2.name] == sdc.hiframes.pd_timestamp_ext.pandas_timestamp_type and rhs.fn in ('-', operator.sub)): nodes = [] arg1 = self._get_dt_index_data(arg1, nodes) @@ -1955,16 +1955,16 @@ def _is_allowed_type(t): func_text += ' dt_index, _str = arg2, arg1\n' comp = 'other {} dt_index[i]'.format(op_str) func_text += ' l = len(dt_index)\n' - func_text += ' other = hpat.hiframes.pd_timestamp_ext.parse_datetime_str(_str)\n' + func_text += ' other = sdc.hiframes.pd_timestamp_ext.parse_datetime_str(_str)\n' func_text += ' S = numba.unsafe.ndarray.empty_inferred((l,))\n' func_text += ' for i in numba.parfor.internal_prange(l):\n' func_text += ' S[i] = {}\n'.format(comp) if is_out_series: # TODO: test - func_text += ' return hpat.hiframes.api.init_series(S)\n' + func_text += ' return sdc.hiframes.api.init_series(S)\n' else: func_text += ' return S\n' loc_vars = {} - exec(func_text, {'hpat': hpat, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'numba': numba}, loc_vars) f = loc_vars['f'] # print(func_text) return self._replace_func(f, [arg1, arg2]) @@ -2009,12 +2009,12 @@ def _handle_string_array_expr(self, assign, rhs): func_text += ' S[i] = {} {} {}\n'.format(arg1_access, op_str, arg2_access) if is_series: - func_text += ' return hpat.hiframes.api.init_series(S)\n' + func_text += ' return sdc.hiframes.api.init_series(S)\n' else: func_text += ' return S\n' loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'numba': numba}, loc_vars) f = loc_vars['f'] return self._replace_func(f, [arg1, arg2], pre_nodes=nodes) @@ -2064,9 +2064,9 @@ def f(_in_arr): # pragma: no cover def _handle_str_contains(self, assign, lhs, rhs, fname): if fname == 'str_contains_regex': - comp_func = 'hpat.str_ext.contains_regex' + comp_func = 'sdc.str_ext.contains_regex' elif fname == 'str_contains_noregex': - comp_func = 'hpat.str_ext.contains_noregex' + comp_func = 'sdc.str_ext.contains_noregex' else: assert False @@ -2075,9 +2075,9 @@ def _handle_str_contains(self, assign, lhs, rhs, fname): func_text += ' S = np.empty(l, dtype=np.bool_)\n' func_text += ' for i in numba.parfor.internal_prange(l):\n' func_text += ' S[i] = {}(str_arr[i], pat)\n'.format(comp_func) - func_text += ' return hpat.hiframes.api.init_series(S)\n' + func_text += ' return sdc.hiframes.api.init_series(S)\n' loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'numba': numba}, loc_vars) f = loc_vars['f'] return self._replace_func(f, rhs.args) @@ -2120,7 +2120,7 @@ def _handle_df_col_calls(self, assign, lhs, rhs, func_name): else: # integer case, TODO: bool, date etc. def func(A): - return hpat.hiframes.api.init_series(A) + return sdc.hiframes.api.init_series(A) return self._replace_func(func, rhs.args) if func_name == 'column_sum': @@ -2143,7 +2143,7 @@ def _handle_df_dropna(self, assign, lhs, rhs): str_colnames = [in_names[i] for i, t in enumerate(in_typ.types) if is_str_arr_typ(t)] list_str_colnames = [in_names[i] for i, t in enumerate(in_typ.types) if t == list_string_array_type] split_view_colnames = [in_names[i] for i, t in enumerate(in_typ.types) if t == string_array_split_view_type] - isna_calls = ['hpat.hiframes.api.isna({}, i)'.format(v) for v in in_names] + isna_calls = ['sdc.hiframes.api.isna({}, i)'.format(v) for v in in_names] func_text = "def _dropna_impl(arr_tup, inplace):\n" func_text += " ({},) = arr_tup\n".format(", ".join(in_names)) @@ -2158,9 +2158,9 @@ def _handle_df_dropna(self, assign, lhs, rhs): func_text += " num_chars_{} += len({}[i])\n".format(c, c) for v, out in zip(in_names, out_names): if v in str_colnames: - func_text += " {} = hpat.str_arr_ext.pre_alloc_string_array(new_len, num_chars_{})\n".format(out, v) + func_text += " {} = sdc.str_arr_ext.pre_alloc_string_array(new_len, num_chars_{})\n".format(out, v) elif v in list_str_colnames: - func_text += " {} = hpat.str_ext.alloc_list_list_str(new_len)\n".format(out) + func_text += " {} = sdc.str_ext.alloc_list_list_str(new_len)\n".format(out) elif v in split_view_colnames: # TODO support dropna() for split view func_text += " {} = {}\n".format(out, v) @@ -2177,7 +2177,7 @@ def _handle_df_dropna(self, assign, lhs, rhs): func_text += " return ({},)\n".format(", ".join(out_names)) loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np, 'numba': numba}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np, 'numba': numba}, loc_vars) _dropna_impl = loc_vars['_dropna_impl'] return self._replace_func(_dropna_impl, rhs.args) @@ -2190,7 +2190,7 @@ def _run_call_concat(self, assign, lhs, rhs): tup_expr = ir.Expr.build_tuple(arrs, arr_tup.loc) nodes.append(ir.Assign(tup_expr, arr_tup, arr_tup.loc)) return self._replace_func( - lambda arr_list: hpat.hiframes.api.init_series(hpat.hiframes.api.concat(arr_list)), + lambda arr_list: sdc.hiframes.api.init_series(sdc.hiframes.api.concat(arr_list)), [arr_tup], pre_nodes=nodes) def _handle_h5_write(self, dset, index, arr): @@ -2203,15 +2203,15 @@ def _handle_h5_write(self, dset, index, arr): func_text += " zero_tup = ({},)\n".format(", ".join(["0"] * ndim)) # TODO: remove after support arr.shape in parallel func_text += " arr_shape = ({},)\n".format(", ".join(["arr.shape[{}]".format(i) for i in range(ndim)])) - func_text += " err = hpat.io.pio_api.h5write(dset_id, np.int32({}),\n".format(ndim) + func_text += " err = sdc.io.pio_api.h5write(dset_id, np.int32({}),\n".format(ndim) func_text += " zero_tup, arr_shape, 0, arr)\n" loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np}, loc_vars) _h5_write_impl = loc_vars['_h5_write_impl'] f_block = compile_to_numba_ir(_h5_write_impl, {'np': np, - 'hpat': hpat}, + 'sdc': sdc}, self.state.typingctx, (self.state.typemap[dset.name], self.state.typemap[arr.name]), @@ -2259,12 +2259,12 @@ def _get_const_tup(self, tup_var): def _get_dt_index_data(self, dt_var, nodes): var_def = guard(get_definition, self.state.func_ir, dt_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_datetime_index', 'hpat.hiframes.api'): + if call_def == ('init_datetime_index', 'sdc.hiframes.api'): return var_def.args[0] f_block = compile_to_numba_ir( - lambda S: hpat.hiframes.api.get_index_data(S), - {'hpat': hpat}, + lambda S: sdc.hiframes.api.get_index_data(S), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[dt_var.name],), self.state.typemap, @@ -2282,14 +2282,14 @@ def _get_series_data(self, series_var, nodes): # and series._data is never overwritten var_def = guard(get_definition, self.state.func_ir, series_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_series', 'hpat.hiframes.api'): + if call_def == ('init_series', 'sdc.hiframes.api'): return var_def.args[0] # XXX use get_series_data() for getting data instead of S._data # to enable alias analysis f_block = compile_to_numba_ir( - lambda S: hpat.hiframes.api.get_series_data(S), - {'hpat': hpat}, + lambda S: sdc.hiframes.api.get_series_data(S), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[series_var.name],), self.state.typemap, @@ -2304,7 +2304,7 @@ def _get_series_index(self, series_var, nodes): # and series._index is never overwritten var_def = guard(get_definition, self.state.func_ir, series_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if (call_def == ('init_series', 'hpat.hiframes.api') + if (call_def == ('init_series', 'sdc.hiframes.api') and (len(var_def.args) >= 2 and not self._is_const_none(var_def.args[1]))): return var_def.args[1] @@ -2312,8 +2312,8 @@ def _get_series_index(self, series_var, nodes): # XXX use get_series_index() for getting data instead of S._index # to enable alias analysis f_block = compile_to_numba_ir( - lambda S: hpat.hiframes.api.get_series_index(S), - {'hpat': hpat}, + lambda S: sdc.hiframes.api.get_series_index(S), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[series_var.name],), self.state.typemap, @@ -2347,13 +2347,13 @@ def _gen_arange(S): # pragma: no cover def _get_series_name(self, series_var, nodes): var_def = guard(get_definition, self.state.func_ir, series_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if (call_def == ('init_series', 'hpat.hiframes.api') + if (call_def == ('init_series', 'sdc.hiframes.api') and len(var_def.args) == 3): return var_def.args[2] f_block = compile_to_numba_ir( - lambda S: hpat.hiframes.api.get_series_name(S), - {'hpat': hpat}, + lambda S: sdc.hiframes.api.get_series_name(S), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[series_var.name],), self.state.typemap, @@ -2366,12 +2366,12 @@ def _get_series_name(self, series_var, nodes): def _get_timedelta_index_data(self, dt_var, nodes): var_def = guard(get_definition, self.state.func_ir, dt_var) call_def = guard(find_callname, self.state.func_ir, var_def) - if call_def == ('init_timedelta_index', 'hpat.hiframes.api'): + if call_def == ('init_timedelta_index', 'sdc.hiframes.api'): return var_def.args[0] f_block = compile_to_numba_ir( - lambda S: hpat.hiframes.api.get_index_data(S), - {'hpat': hpat}, + lambda S: sdc.hiframes.api.get_index_data(S), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[dt_var.name],), self.state.typemap, @@ -2383,7 +2383,7 @@ def _get_timedelta_index_data(self, dt_var, nodes): def _replace_func(self, func, args, const=False, pre_nodes=None, extra_globals=None, pysig=None, kws=None): - glbls = {'numba': numba, 'np': np, 'hpat': hpat} + glbls = {'numba': numba, 'np': np, 'sdc': sdc} if extra_globals is not None: glbls.update(extra_globals) @@ -2490,10 +2490,10 @@ def _handle_hiframes_nodes(self, inst): use_vars = list(inst.right_vars.values()) + list(inst.left_vars.values()) def_vars = list(inst.df_out_vars.values()) apply_copies_func = hiframes.join.apply_copies_join - elif isinstance(inst, hpat.io.csv_ext.CsvReader): + elif isinstance(inst, sdc.io.csv_ext.CsvReader): use_vars = [] def_vars = inst.out_vars - apply_copies_func = hpat.io.csv_ext.apply_copies_csv + apply_copies_func = sdc.io.csv_ext.apply_copies_csv else: assert isinstance(inst, hiframes.filter.Filter) use_vars = list(inst.df_in_vars.values()) @@ -2533,8 +2533,8 @@ def _convert_series_hiframes_nodes(self, inst, use_vars, def_vars, v.scope, mk_unique_var(v.name + 'data'), v.loc) self.state.typemap[data_var.name] = series_to_array_type(self.state.typemap[v.name]) f_block = compile_to_numba_ir( - lambda A: hpat.hiframes.api.init_series(A), - {'hpat': hpat}, + lambda A: sdc.hiframes.api.init_series(A), + {'sdc': sdc}, self.state.typingctx, (self.state.typemap[data_var.name],), self.state.typemap, diff --git a/hpat/hiframes/hiframes_untyped.py b/sdc/hiframes/hiframes_untyped.py similarity index 92% rename from hpat/hiframes/hiframes_untyped.py rename to sdc/hiframes/hiframes_untyped.py index 19ca10878..6997a3790 100644 --- a/hpat/hiframes/hiframes_untyped.py +++ b/sdc/hiframes/hiframes_untyped.py @@ -48,31 +48,31 @@ from numba.analysis import compute_cfg_from_blocks from numba.compiler_machinery import FunctionPass, register_pass -import hpat -from hpat import utils, config -import hpat.io -from hpat.io import pio, parquet_pio -from hpat.hiframes import filter, join, aggregate, sort -from hpat.utils import (get_constant, NOT_CONSTANT, debug_prints, +import sdc +from sdc import utils, config +import sdc.io +from sdc.io import pio, parquet_pio +from sdc.hiframes import filter, join, aggregate, sort +from sdc.utils import (get_constant, NOT_CONSTANT, debug_prints, inline_new_blocks, ReplaceFunc, is_call, is_assign, update_globals) -import hpat.hiframes.api -from hpat.str_ext import string_type -from hpat.str_arr_ext import string_array_type -import hpat.io -from hpat.io import csv_ext +import sdc.hiframes.api +from sdc.str_ext import string_type +from sdc.str_arr_ext import string_array_type +import sdc.io +from sdc.io import csv_ext import pandas as pd import numpy as np import math -import hpat.io -from hpat.io.parquet_pio import ParquetHandler -from hpat.hiframes.pd_timestamp_ext import (datetime_date_type, +import sdc.io +from sdc.io.parquet_pio import ParquetHandler +from sdc.hiframes.pd_timestamp_ext import (datetime_date_type, datetime_date_to_int, int_to_datetime_date) -from hpat.hiframes.pd_series_ext import SeriesType -from hpat.hiframes.pd_categorical_ext import PDCategoricalDtype, CategoricalArray -from hpat.hiframes.rolling import get_rolling_setup_args, supported_rolling_funcs -from hpat.hiframes.aggregate import get_agg_func, supported_agg_funcs -import hpat.hiframes.pd_dataframe_ext +from sdc.hiframes.pd_series_ext import SeriesType +from sdc.hiframes.pd_categorical_ext import PDCategoricalDtype, CategoricalArray +from sdc.hiframes.rolling import get_rolling_setup_args, supported_rolling_funcs +from sdc.hiframes.aggregate import get_agg_func, supported_agg_funcs +import sdc.hiframes.pd_dataframe_ext def remove_hiframes(rhs, lives, call_list): @@ -86,30 +86,30 @@ def remove_hiframes(rhs, lives, call_list): # used in stencil generation of rolling if call_list == ['ceil', math]: return True - if (len(call_list) == 4 and call_list[1:] == ['api', 'hiframes', hpat] and + if (len(call_list) == 4 and call_list[1:] == ['api', 'hiframes', sdc] and call_list[0] in ['fix_df_array', 'fix_rolling_array', 'concat', 'count', 'mean', 'quantile', 'var', 'str_contains_regex', 'str_contains_noregex', 'column_sum', 'nunique', 'init_series', 'init_datetime_index', 'convert_tup_to_rec', 'convert_rec_to_tup']): return True - if (len(call_list) == 4 and call_list[1:] == ['series_kernels', 'hiframes', hpat] and + if (len(call_list) == 4 and call_list[1:] == ['series_kernels', 'hiframes', sdc] and call_list[0] in ['_sum_handle_nan', '_mean_handle_nan', '_var_handle_nan']): return True - if call_list == ['dist_return', 'distributed_api', hpat]: + if call_list == ['dist_return', 'distributed_api', sdc]: return True - if call_list == ['init_dataframe', 'pd_dataframe_ext', 'hiframes', hpat]: + if call_list == ['init_dataframe', 'pd_dataframe_ext', 'hiframes', sdc]: return True - if call_list == ['get_dataframe_data', 'pd_dataframe_ext', 'hiframes', hpat]: + if call_list == ['get_dataframe_data', 'pd_dataframe_ext', 'hiframes', sdc]: return True - if call_list == ['get_dataframe_index', 'pd_dataframe_ext', 'hiframes', hpat]: + if call_list == ['get_dataframe_index', 'pd_dataframe_ext', 'hiframes', sdc]: return True - # if call_list == ['set_parent_dummy', 'pd_dataframe_ext', 'hiframes', hpat]: + # if call_list == ['set_parent_dummy', 'pd_dataframe_ext', 'hiframes', sdc]: # return True - if call_list == ['rolling_dummy', 'pd_rolling_ext', 'hiframes', hpat]: + if call_list == ['rolling_dummy', 'pd_rolling_ext', 'hiframes', sdc]: return True - if call_list == ['agg_typer', 'api', 'hiframes', hpat]: + if call_list == ['agg_typer', 'api', 'hiframes', sdc]: return True if call_list == [list]: return True @@ -329,9 +329,9 @@ def _run_assign(self, assign, label): # a = add_consts_to_type(tmp, 'A', 'B') vals_expr = ", ".join("'{}'".format(c) if isinstance(c, str) else "{}".format(c) for c in vals) func_text = "def _build_f(a):\n" - func_text += " return hpat.hiframes.api.add_consts_to_type(a, {})\n".format(vals_expr) + func_text += " return sdc.hiframes.api.add_consts_to_type(a, {})\n".format(vals_expr) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _build_f = loc_vars['_build_f'] target = assign.target tmp_target = ir.Var( @@ -363,7 +363,7 @@ def _run_assign(self, assign, label): self._working_body.insert(0, meta_assign) pivot_call.kws = list(pivot_call.kws) pivot_call.kws.append(('_pivot_values', meta_var)) - self.state.locals[meta_var.name] = hpat.hiframes.api.MetaType(pivot_values) + self.state.locals[meta_var.name] = sdc.hiframes.api.MetaType(pivot_values) # handle copies lhs = f if isinstance(rhs, ir.Var) and rhs.name in self.df_vars: @@ -429,7 +429,7 @@ def _run_call(self, assign, label): if fdef == ('to_numeric', 'pandas'): return self._handle_pd_to_numeric(assign, lhs, rhs) - if fdef == ('read_ros_images', 'hpat.ros'): + if fdef == ('read_ros_images', 'sdc.ros'): return self._handle_ros(assign, lhs, rhs) if isinstance(func_mod, ir.Var) and self._is_df_var(func_mod): @@ -455,10 +455,10 @@ def _run_call(self, assign, label): return self.h5_handler._handle_h5_File_call(assign, lhs, rhs) if fdef == ('fromfile', 'numpy'): - return hpat.io.np_io._handle_np_fromfile(assign, lhs, rhs) + return sdc.io.np_io._handle_np_fromfile(assign, lhs, rhs) - if fdef == ('read_xenon', 'hpat.xenon_ext'): - col_items, nodes = hpat.xenon_ext._handle_read(assign, lhs, rhs, self.state.func_ir) + if fdef == ('read_xenon', 'sdc.xenon_ext'): + col_items, nodes = sdc.xenon_ext._handle_read(assign, lhs, rhs, self.state.func_ir) df_nodes, col_map = self._process_df_build_map(col_items) self._create_df(lhs.name, col_map, label) nodes += df_nodes @@ -522,10 +522,10 @@ def _handle_df_isin(self, lhs, rhs, df_var, label): other_colmap = {c: other for c in df_col_map.keys()} out_df_map = {} - def isin_func(A, B): return hpat.hiframes.api.df_isin(A, B) - def isin_vals_func(A, B): return hpat.hiframes.api.df_isin_vals(A, B) + def isin_func(A, B): return sdc.hiframes.api.df_isin(A, B) + def isin_vals_func(A, B): return sdc.hiframes.api.df_isin_vals(A, B) # create array of False values used when other col not available - def bool_arr_func(A): return hpat.hiframes.api.init_series(np.zeros(len(A), np.bool_)) + def bool_arr_func(A): return sdc.hiframes.api.init_series(np.zeros(len(A), np.bool_)) # use the first array of df to get len. TODO: check for empty df false_arr_args = [list(df_col_map.values())[0]] @@ -540,7 +540,7 @@ def bool_arr_func(A): return hpat.hiframes.api.init_series(np.zeros(len(A), np.b else: func = bool_arr_func args = false_arr_args - f_block = compile_to_numba_ir(func, {'hpat': hpat, 'np': np}).blocks.popitem()[1] + f_block = compile_to_numba_ir(func, {'sdc': sdc, 'np': np}).blocks.popitem()[1] replace_arg_nodes(f_block, args) nodes += f_block.body[:-2] out_df_map[cname] = nodes[-1].target @@ -596,13 +596,13 @@ def _handle_df_dropna(self, lhs, rhs, df_var, label): out_names = ", ".join([mk_unique_var(cname).replace('.', '_') for cname in col_names]) func_text = "def _dropna_imp({}, inplace):\n".format(arg_names) - func_text += " ({},) = hpat.hiframes.api.dropna(({},), inplace)\n".format( + func_text += " ({},) = sdc.hiframes.api.dropna(({},), inplace)\n".format( out_names, arg_names) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _dropna_imp = loc_vars['_dropna_imp'] - f_block = compile_to_numba_ir(_dropna_imp, {'hpat': hpat}).blocks.popitem()[1] + f_block = compile_to_numba_ir(_dropna_imp, {'sdc': sdc}).blocks.popitem()[1] replace_arg_nodes(f_block, col_vars + [inplace_var]) nodes += f_block.body[:-3] @@ -636,8 +636,8 @@ def _handle_df_drop(self, assign, lhs, rhs, df_var): # is not used in other code paths # replace func variable with drop_inplace f_block = compile_to_numba_ir( - lambda: hpat.hiframes.api.drop_inplace, - {'hpat': hpat}).blocks.popitem()[1] + lambda: sdc.hiframes.api.drop_inplace, + {'sdc': sdc}).blocks.popitem()[1] nodes = f_block.body[:-2] new_func_var = nodes[-1].target rhs.func = new_func_var @@ -738,10 +738,10 @@ def _handle_pd_DataFrame(self, assign, lhs, rhs, label): col_args = ", ".join('col{}'.format(i) for i in range(n_cols)) func_text = "def _init_df({}, index, {}):\n".format(data_args, col_args) - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, index, {})\n".format( + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, index, {})\n".format( data_args, col_args) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _init_df = loc_vars['_init_df'] # TODO: support index var @@ -856,14 +856,14 @@ def _handle_pd_read_csv(self, assign, lhs, rhs, label): data_args = ", ".join('data{}'.format(i) for i in range(n_cols)) func_text = "def _init_df({}):\n".format(data_args) - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( data_args, ", ".join("'{}'".format(c) for c in columns)) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _init_df = loc_vars['_init_df'] f_block = compile_to_numba_ir( - _init_df, {'hpat': hpat}).blocks.popitem()[1] + _init_df, {'sdc': sdc}).blocks.popitem()[1] replace_arg_nodes(f_block, data_arrs) nodes += f_block.body[:-2] nodes[-1].target = lhs @@ -947,13 +947,13 @@ def _handle_pd_Series(self, assign, lhs, rhs): in_data = arg_def.vararg arg_def.vararg = None # avoid typing error return self._replace_func( - lambda l: hpat.hiframes.api.flatten_to_series(l), + lambda l: sdc.hiframes.api.flatten_to_series(l), [in_data] ) # pd.Series() is handled in typed pass now - # return self._replace_func(lambda arr: hpat.hiframes.api.init_series( - # hpat.hiframes.api.fix_df_array(arr)), + # return self._replace_func(lambda arr: sdc.hiframes.api.init_series( + # sdc.hiframes.api.fix_df_array(arr)), # [data]) return [assign] @@ -973,7 +973,7 @@ def _handle_pd_to_numeric(self, assign, lhs, rhs): arg = rhs.args[0] return self._replace_func( - lambda arr: hpat.hiframes.api.to_numeric(arr, dtype), + lambda arr: sdc.hiframes.api.to_numeric(arr, dtype), [arg], extra_globals={'dtype': dtype}) def _handle_pq_read_table(self, assign, lhs, rhs): @@ -994,10 +994,10 @@ def _gen_parquet_read(self, fname, lhs, label): data_args = ", ".join('data{}'.format(i) for i in range(n_cols)) func_text = "def _init_df({}):\n".format(data_args) - func_text += " return hpat.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( + func_text += " return sdc.hiframes.pd_dataframe_ext.init_dataframe({}, None, {})\n".format( data_args, ", ".join("'{}'".format(c) for c in columns)) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _init_df = loc_vars['_init_df'] return self._replace_func(_init_df, data_arrs, pre_nodes=nodes) @@ -1045,10 +1045,10 @@ def gen_nan_func(A): return np.full(len(A), np.nan) # gen concat function arg_names = ", ".join(['in{}'.format(i) for i in range(len(df_list))]) func_text = "def _concat_imp({}):\n".format(arg_names) - func_text += " return hpat.hiframes.api.init_series(hpat.hiframes.api.concat(({})))\n".format( + func_text += " return sdc.hiframes.api.init_series(sdc.hiframes.api.concat(({})))\n".format( arg_names) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _concat_imp = loc_vars['_concat_imp'] done_cols = {} @@ -1063,7 +1063,7 @@ def gen_nan_func(A): return np.full(len(A), np.nan) # use a df column just for len() len_arr = list(df_col_map.values())[0] f_block = compile_to_numba_ir(gen_nan_func, - {'hpat': hpat, 'np': np}).blocks.popitem()[1] + {'sdc': sdc, 'np': np}).blocks.popitem()[1] replace_arg_nodes(f_block, [len_arr]) nodes += f_block.body[:-2] args.append(nodes[-1].target) @@ -1071,7 +1071,7 @@ def gen_nan_func(A): return np.full(len(A), np.nan) args.append(df_col_map[cname]) f_block = compile_to_numba_ir(_concat_imp, - {'hpat': hpat, 'np': np}).blocks.popitem()[1] + {'sdc': sdc, 'np': np}).blocks.popitem()[1] replace_arg_nodes(f_block, args) nodes += f_block.body[:-2] done_cols[cname] = nodes[-1].target @@ -1082,14 +1082,14 @@ def gen_nan_func(A): return np.full(len(A), np.nan) def _handle_concat_series(self, lhs, rhs): # defer to typed pass since the type might be non-numerical def f(arr_list): # pragma: no cover - return hpat.hiframes.api.init_series(hpat.hiframes.api.concat(arr_list)) + return sdc.hiframes.api.init_series(sdc.hiframes.api.concat(arr_list)) return self._replace_func(f, rhs.args) def _handle_ros(self, assign, lhs, rhs): if len(rhs.args) != 1: # pragma: no cover raise ValueError("Invalid read_ros_images() arguments") - import hpat.ros - return hpat.ros._handle_read_images(lhs, rhs) + import sdc.ros + return sdc.ros._handle_read_images(lhs, rhs) def _fix_df_arrays(self, items_list): nodes = [] @@ -1102,9 +1102,9 @@ def _fix_df_arrays(self, items_list): col_arr = self._fix_df_list_of_array(col_arr) def f(arr): # pragma: no cover - df_arr = hpat.hiframes.api.fix_df_array(arr) + df_arr = sdc.hiframes.api.fix_df_array(arr) f_block = compile_to_numba_ir( - f, {'hpat': hpat}).blocks.popitem()[1] + f, {'sdc': sdc}).blocks.popitem()[1] replace_arg_nodes(f_block, [col_arr]) nodes += f_block.body[:-3] # remove none return new_col_arr = nodes[-1].target @@ -1132,9 +1132,9 @@ def _process_df_build_map(self, items_list): # cast to series type def f(arr): # pragma: no cover - df_arr = hpat.hiframes.api.init_series(arr) + df_arr = sdc.hiframes.api.init_series(arr) f_block = compile_to_numba_ir( - f, {'hpat': hpat}).blocks.popitem()[1] + f, {'sdc': sdc}).blocks.popitem()[1] replace_arg_nodes(f_block, [item[1]]) nodes += f_block.body[:-3] # remove none return new_col_arr = nodes[-1].target @@ -1159,8 +1159,8 @@ def _get_func_output_typ(self, col_var, func, wrapper_func, label): dummy_ir.blocks[0].body = all_body _globals = self.state.func_ir.func_id.func.__globals__ - _globals.update({'hpat': hpat, 'numba': numba, 'np': np}) - f_ir = compile_to_numba_ir(wrapper_func, {'hpat': hpat}) + _globals.update({'sdc': sdc, 'numba': numba, 'np': np}) + f_ir = compile_to_numba_ir(wrapper_func, {'sdc': sdc}) # fix definitions to enable finding sentinel f_ir._definitions = build_definitions(f_ir.blocks) first_label = min(f_ir.blocks) @@ -1235,9 +1235,9 @@ def _handle_df_pivot_table(self, lhs, rhs, df_var, label): nodes = [ir.Assign(ir.Global("agg_gb", agg_func_dis, lhs.loc), agg_gb_var, lhs.loc)] def to_arr(a, _agg_f): - b = hpat.hiframes.api.to_arr_from_series(a) - res = hpat.hiframes.api.init_series(hpat.hiframes.api.agg_typer(b, _agg_f)) - f_block = compile_to_numba_ir(to_arr, {'hpat': hpat, 'np': np}).blocks.popitem()[1] + b = sdc.hiframes.api.to_arr_from_series(a) + res = sdc.hiframes.api.init_series(sdc.hiframes.api.agg_typer(b, _agg_f)) + f_block = compile_to_numba_ir(to_arr, {'sdc': sdc, 'np': np}).blocks.popitem()[1] replace_arg_nodes(f_block, [in_vars[values_arg], agg_gb_var]) nodes += f_block.body[:-3] # remove none return out_types = {values_arg: nodes[-1].target} @@ -1308,8 +1308,8 @@ def _handle_crosstab(self, lhs, rhs, label): # output of crosstab is array[int64] def to_arr(): - res = hpat.hiframes.api.init_series(np.empty(1, np.int64)) - f_block = compile_to_numba_ir(to_arr, {'hpat': hpat, 'np': np}).blocks.popitem()[1] + res = sdc.hiframes.api.init_series(np.empty(1, np.int64)) + f_block = compile_to_numba_ir(to_arr, {'sdc': sdc, 'np': np}).blocks.popitem()[1] nodes = f_block.body[:-3] # remove none return out_tp_var = nodes[-1].target out_types = {'__dummy__': out_tp_var} @@ -1394,8 +1394,8 @@ def _handle_agg_func(self, in_vars, out_colnames, func_name, lhs, rhs): agg_func = get_agg_func(self.state.func_ir, func_name, rhs) out_tp_vars = {} - # hpat.jit() instead of numba.njit() to handle str arrs etc - agg_func_dis = hpat.jit(agg_func) + # sdc.jit() instead of numba.njit() to handle str arrs etc + agg_func_dis = sdc.jit(agg_func) #agg_func_dis = numba.njit(agg_func) agg_gb_var = ir.Var(lhs.scope, mk_unique_var("agg_gb"), lhs.loc) nodes = [ir.Assign(ir.Global("agg_gb", agg_func_dis, lhs.loc), agg_gb_var, lhs.loc)] @@ -1403,9 +1403,9 @@ def _handle_agg_func(self, in_vars, out_colnames, func_name, lhs, rhs): in_var = in_vars[out_cname] def to_arr(a, _agg_f): - b = hpat.hiframes.api.to_arr_from_series(a) - res = hpat.hiframes.api.init_series(hpat.hiframes.api.agg_typer(b, _agg_f)) - f_block = compile_to_numba_ir(to_arr, {'hpat': hpat, 'np': np}).blocks.popitem()[1] + b = sdc.hiframes.api.to_arr_from_series(a) + res = sdc.hiframes.api.init_series(sdc.hiframes.api.agg_typer(b, _agg_f)) + f_block = compile_to_numba_ir(to_arr, {'sdc': sdc, 'np': np}).blocks.popitem()[1] replace_arg_nodes(f_block, [in_var, agg_gb_var]) nodes += f_block.body[:-3] # remove none return out_tp_vars[out_cname] = nodes[-1].target @@ -1442,7 +1442,7 @@ def _get_str_or_list(self, by_arg, list_only=False, default=None, err_msg=None, if by_arg_def is None: # try add_consts_to_type by_arg_call = guard(get_definition, self.state.func_ir, by_arg) - if guard(find_callname, self.state.func_ir, by_arg_call) == ('add_consts_to_type', 'hpat.hiframes.api'): + if guard(find_callname, self.state.func_ir, by_arg_call) == ('add_consts_to_type', 'sdc.hiframes.api'): by_arg_def = guard(find_build_sequence, self.state.func_ir, by_arg_call.args[0]) if by_arg_def is None: @@ -1601,56 +1601,56 @@ def _gen_rolling_call(self, in_col_var, out_col_var, window, center, args, func_ if on_arr is not None: if func_name == 'cov': def f(arr, other, on_arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.api.init_series( - hpat.hiframes.rolling.rolling_cov( + df_arr = sdc.hiframes.api.init_series( + sdc.hiframes.rolling.rolling_cov( arr, other, on_arr, w, center)) if func_name == 'corr': def f(arr, other, on_arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.api.init_series( - hpat.hiframes.rolling.rolling_corr( + df_arr = sdc.hiframes.api.init_series( + sdc.hiframes.rolling.rolling_corr( arr, other, on_arr, w, center)) args = [in_col_var, other, on_arr, window, center] else: if func_name == 'cov': def f(arr, other, w, center): # pragma: no cover - df_arr = hpat.hiframes.api.init_series( - hpat.hiframes.rolling.rolling_cov( + df_arr = sdc.hiframes.api.init_series( + sdc.hiframes.rolling.rolling_cov( arr, other, w, center)) if func_name == 'corr': def f(arr, other, w, center): # pragma: no cover - df_arr = hpat.hiframes.api.init_series( - hpat.hiframes.rolling.rolling_corr( + df_arr = sdc.hiframes.api.init_series( + sdc.hiframes.rolling.rolling_corr( arr, other, w, center)) args = [in_col_var, other, window, center] # variable window case elif on_arr is not None: if func_name == 'apply': def f(arr, on_arr, w, center, func): # pragma: no cover - df_arr = hpat.hiframes.api.init_series( - hpat.hiframes.rolling.rolling_variable( + df_arr = sdc.hiframes.api.init_series( + sdc.hiframes.rolling.rolling_variable( arr, on_arr, w, center, False, func)) args = [in_col_var, on_arr, window, center, args[0]] else: def f(arr, on_arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.api.init_series( - hpat.hiframes.rolling.rolling_variable( + df_arr = sdc.hiframes.api.init_series( + sdc.hiframes.rolling.rolling_variable( arr, on_arr, w, center, False, _func_name)) args = [in_col_var, on_arr, window, center] else: # fixed window # apply case takes the passed function instead of just name if func_name == 'apply': def f(arr, w, center, func): # pragma: no cover - df_arr = hpat.hiframes.api.init_series( - hpat.hiframes.rolling.rolling_fixed( + df_arr = sdc.hiframes.api.init_series( + sdc.hiframes.rolling.rolling_fixed( arr, w, center, False, func)) args = [in_col_var, window, center, args[0]] else: def f(arr, w, center): # pragma: no cover - df_arr = hpat.hiframes.api.init_series( - hpat.hiframes.rolling.rolling_fixed( + df_arr = sdc.hiframes.api.init_series( + sdc.hiframes.rolling.rolling_fixed( arr, w, center, False, _func_name)) args = [in_col_var, window, center] - f_block = compile_to_numba_ir(f, {'hpat': hpat, '_func_name': func_name}).blocks.popitem()[1] + f_block = compile_to_numba_ir(f, {'sdc': sdc, '_func_name': func_name}).blocks.popitem()[1] replace_arg_nodes(f_block, args) nodes += f_block.body[:-3] # remove none return nodes[-1].target = out_col_var @@ -1662,8 +1662,8 @@ def _fix_rolling_array(self, col_var, func): """ # TODO: check all possible funcs def f(arr): # pragma: no cover - df_arr = hpat.hiframes.api.fix_rolling_array(arr) - f_block = compile_to_numba_ir(f, {'hpat': hpat}).blocks.popitem()[1] + df_arr = sdc.hiframes.api.fix_rolling_array(arr) + f_block = compile_to_numba_ir(f, {'sdc': sdc}).blocks.popitem()[1] replace_arg_nodes(f_block, [col_var]) nodes = f_block.body[:-3] # remove none return new_col_var = nodes[-1].target @@ -1774,14 +1774,14 @@ def _run_return(self, ret_node): def _gen_replace_dist_return(self, var, flag): if flag == 'distributed': def f(_dist_arr): # pragma: no cover - _d_arr = hpat.distributed_api.dist_return(_dist_arr) + _d_arr = sdc.distributed_api.dist_return(_dist_arr) elif flag == 'threaded': def f(_threaded_arr): # pragma: no cover - _th_arr = hpat.distributed_api.threaded_return(_threaded_arr) + _th_arr = sdc.distributed_api.threaded_return(_threaded_arr) else: raise ValueError("Invalid return flag {}".format(flag)) f_block = compile_to_numba_ir( - f, {'hpat': hpat}).blocks.popitem()[1] + f, {'sdc': sdc}).blocks.popitem()[1] replace_arg_nodes(f_block, [var]) return f_block.body[:-3] # remove none return @@ -1823,8 +1823,8 @@ def _run_df_set_column(self, inst, label, cfg): cname_var = ir.Var(inst.value.scope, mk_unique_var("$cname_const"), inst.loc) nodes = [ir.Assign(ir.Const(inst.index, inst.loc), cname_var, inst.loc)] - def func(df, cname, arr): return hpat.hiframes.api.set_df_col(df, cname, arr) - f_block = compile_to_numba_ir(func, {'hpat': hpat}).blocks.popitem()[1] + def func(df, cname, arr): return sdc.hiframes.api.set_df_col(df, cname, arr) + f_block = compile_to_numba_ir(func, {'sdc': sdc}).blocks.popitem()[1] replace_arg_nodes(f_block, [df_var, cname_var, inst.value]) nodes += f_block.body[:-2] @@ -1836,7 +1836,7 @@ def func(df, cname, arr): return hpat.hiframes.api.set_df_col(df, cname, arr) def _replace_func(self, func, args, const=False, array_typ_convert=True, pre_nodes=None, extra_globals=None): - glbls = {'numba': numba, 'np': np, 'hpat': hpat} + glbls = {'numba': numba, 'np': np, 'sdc': sdc} if extra_globals is not None: glbls.update(extra_globals) return ReplaceFunc(func, None, args, glbls, pre_nodes) diff --git a/hpat/hiframes/join.py b/sdc/hiframes/join.py similarity index 95% rename from hpat/hiframes/join.py rename to sdc/hiframes/join.py index 3dcf56778..17b1d5028 100644 --- a/hpat/hiframes/join.py +++ b/sdc/hiframes/join.py @@ -27,8 +27,8 @@ from .. import hdist from .. import chiframes -from hpat import config as hpat_config -from hpat.utils import _numba_to_c_type_map +from sdc import config as hpat_config +from sdc.utils import _numba_to_c_type_map from numba.targets.arrayobj import make_array import llvmlite.binding as ll from llvmlite import ir as lir @@ -45,21 +45,21 @@ from numba.ir_utils import (visit_vars_inner, replace_vars_inner, compile_to_numba_ir, replace_arg_nodes, mk_unique_var) -import hpat -from hpat import distributed, distributed_analysis -from hpat.utils import debug_prints, alloc_arr_tup, empty_like_type -from hpat.distributed_analysis import Distribution +import sdc +from sdc import distributed, distributed_analysis +from sdc.utils import debug_prints, alloc_arr_tup, empty_like_type +from sdc.distributed_analysis import Distribution -from hpat.str_arr_ext import (string_array_type, to_string_list, +from sdc.str_arr_ext import (string_array_type, to_string_list, cp_str_list_to_array, str_list_to_array, get_offset_ptr, get_data_ptr, convert_len_arr_to_offset, pre_alloc_string_array, num_total_chars, getitem_str_offset, copy_str_arr_slice, str_copy_ptr, get_utf8_size, setitem_str_offset, str_arr_set_na) -from hpat.str_ext import string_type -from hpat.timsort import copyElement_tup, getitem_arr_tup, setitem_arr_tup -from hpat.shuffle_utils import ( +from sdc.str_ext import string_type +from sdc.timsort import copyElement_tup, getitem_arr_tup, setitem_arr_tup +from sdc.shuffle_utils import ( getitem_arr_tup_single, val_to_tup, alltoallv, @@ -69,7 +69,7 @@ alloc_pre_shuffle_metadata, _get_keys_tup, _get_data_tup) -from hpat.hiframes.pd_categorical_ext import CategoricalArray +from sdc.hiframes.pd_categorical_ext import CategoricalArray class Join(ir.Stmt): @@ -234,7 +234,7 @@ def visit_vars_join(join_node, callback, cbdata): def remove_dead_join(join_node, lives, arg_aliases, alias_map, func_ir, typemap): - if not hpat.hiframes.api.enable_hiframes_remove_dead: + if not sdc.hiframes.api.enable_hiframes_remove_dead: return join_node # if an output column is dead, the related input column is not needed # anymore in the join @@ -390,8 +390,8 @@ def join_distributed_run(join_node, array_dists, typemap, calltypes, typingctx, if method == 'sort' and join_node.how != 'asof': # asof key is already sorted, TODO: add error checking # local sort - func_text += " hpat.hiframes.sort.local_sort(t1_keys, data_left)\n" - func_text += " hpat.hiframes.sort.local_sort(t2_keys, data_right)\n" + func_text += " sdc.hiframes.sort.local_sort(t1_keys, data_left)\n" + func_text += " sdc.hiframes.sort.local_sort(t2_keys, data_right)\n" # align output variables for local merge # add keys first (TODO: remove dead keys) @@ -414,18 +414,18 @@ def join_distributed_run(join_node, array_dists, typemap, calltypes, typingctx, if join_node.how == 'asof': func_text += (" out_t1_keys, out_t2_keys, out_data_left, out_data_right" - " = hpat.hiframes.join.local_merge_asof(t1_keys, t2_keys, data_left, data_right)\n") + " = sdc.hiframes.join.local_merge_asof(t1_keys, t2_keys, data_left, data_right)\n") elif method == 'sort': func_text += ( " out_t1_keys, out_t2_keys, out_data_left, out_data_right" - " = hpat.hiframes.join.local_merge_new(t1_keys, t2_keys, data_left, data_right, {}, {})\n".format( + " = sdc.hiframes.join.local_merge_new(t1_keys, t2_keys, data_left, data_right, {}, {})\n".format( join_node.how in ( 'left', 'outer'), join_node.how == 'outer')) else: assert method == 'hash' func_text += ( " out_t1_keys, out_t2_keys, out_data_left, out_data_right" - " = hpat.hiframes.join.local_hash_join(t1_keys, t2_keys, data_left, data_right, {}, {})\n".format( + " = sdc.hiframes.join.local_hash_join(t1_keys, t2_keys, data_left, data_right, {}, {})\n".format( join_node.how in ( 'left', 'outer'), join_node.how == 'outer')) @@ -453,13 +453,13 @@ def join_distributed_run(join_node, array_dists, typemap, calltypes, typingctx, func_text += " {} = right_{}\n".format(out_names[i + 2 * n_keys + len(left_other_names)], i) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) join_impl = loc_vars['f'] # print(func_text) glbs = { - 'hpat': hpat, + 'sdc': sdc, 'np': np, 'to_string_list': to_string_list, 'cp_str_list_to_array': cp_str_list_to_array, @@ -502,7 +502,7 @@ def _get_table_parallel_flags(join_node, array_dists): # @numba.njit def parallel_join_impl(key_arrs, data): # alloc shuffle meta - n_pes = hpat.distributed_api.get_size() + n_pes = sdc.distributed_api.get_size() pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, data, n_pes, False) # calc send/recv counts @@ -538,13 +538,13 @@ def parallel_join(key_arrs, data): def parallel_asof_comm(left_key_arrs, right_key_arrs, right_data): # align the left and right intervals # allgather the boundaries of all left intervals and calculate overlap - # rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() + # rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() # TODO: multiple keys bnd_starts = np.empty(n_pes, left_key_arrs[0].dtype) bnd_ends = np.empty(n_pes, left_key_arrs[0].dtype) - hpat.distributed_api.allgather(bnd_starts, left_key_arrs[0][0]) - hpat.distributed_api.allgather(bnd_ends, left_key_arrs[0][-1]) + sdc.distributed_api.allgather(bnd_starts, left_key_arrs[0][0]) + sdc.distributed_api.allgather(bnd_ends, left_key_arrs[0][-1]) send_counts = np.zeros(n_pes, np.int32) send_disp = np.zeros(n_pes, np.int32) @@ -573,15 +573,15 @@ def parallel_asof_comm(left_key_arrs, right_key_arrs, right_data): send_disp[i] = len(right_key_arrs[0]) - 1 i += 1 - hpat.distributed_api.alltoall(send_counts, recv_counts, 1) + sdc.distributed_api.alltoall(send_counts, recv_counts, 1) n_total_recv = recv_counts.sum() out_r_keys = np.empty(n_total_recv, right_key_arrs[0].dtype) # TODO: support string out_r_data = alloc_arr_tup(n_total_recv, right_data) - recv_disp = hpat.hiframes.join.calc_disp(recv_counts) - hpat.distributed_api.alltoallv(right_key_arrs[0], out_r_keys, send_counts, + recv_disp = sdc.hiframes.join.calc_disp(recv_counts) + sdc.distributed_api.alltoallv(right_key_arrs[0], out_r_keys, send_counts, recv_counts, send_disp, recv_disp) - hpat.distributed_api.alltoallv_tup(right_data, out_r_data, send_counts, recv_counts, send_disp, recv_disp) + sdc.distributed_api.alltoallv_tup(right_data, out_r_data, send_counts, recv_counts, send_disp, recv_disp) return (out_r_keys,), out_r_data @@ -721,7 +721,7 @@ def ensure_capacity(arr, new_size): curr_len = len(arr) if curr_len < new_size: new_len = 2 * curr_len - new_arr = hpat.hiframes.pd_categorical_ext.fix_cat_array_type( + new_arr = sdc.hiframes.pd_categorical_ext.fix_cat_array_type( np.empty(new_len, arr.dtype)) new_arr[:curr_len] = arr return new_arr @@ -840,7 +840,7 @@ def copy_elem_buff_tup_overload(data, ind, val): def trim_arr(arr, size): # pragma: no cover - return hpat.hiframes.pd_categorical_ext.fix_cat_array_type(arr[:size]) + return sdc.hiframes.pd_categorical_ext.fix_cat_array_type(arr[:size]) @overload(trim_arr) @@ -927,18 +927,18 @@ def local_hash_join_impl(left_keys, right_keys, data_left, data_right, is_left=F r_matched = np.full(r_len, False, np.bool_) out_ind = 0 - m = hpat.dict_ext.multimap_int64_init() + m = sdc.dict_ext.multimap_int64_init() for i in range(r_len): # store hash if keys are tuple or non-int k = _hash_if_tup(getitem_arr_tup(right_keys, i)) - hpat.dict_ext.multimap_int64_insert(m, k, i) + sdc.dict_ext.multimap_int64_insert(m, k, i) - r = hpat.dict_ext.multimap_int64_equal_range_alloc() + r = sdc.dict_ext.multimap_int64_equal_range_alloc() for i in range(l_len): l_key = getitem_arr_tup(left_keys, i) l_data_val = getitem_arr_tup(data_left, i) k = _hash_if_tup(l_key) - hpat.dict_ext.multimap_int64_equal_range_inplace(m, k, r) + sdc.dict_ext.multimap_int64_equal_range_inplace(m, k, r) num_matched = 0 for j in r: # if hash for stored, check left key against the actual right key @@ -959,7 +959,7 @@ def local_hash_join_impl(left_keys, right_keys, data_left, data_right, is_left=F out_data_right = setnan_elem_buff_tup(out_data_right, out_ind) out_ind += 1 - hpat.dict_ext.multimap_int64_equal_range_dealloc(r) + sdc.dict_ext.multimap_int64_equal_range_dealloc(r) # produce NA rows for unmatched right keys if is_right: @@ -1158,7 +1158,7 @@ def b_set(arr, ind): if isinstance(arr, CategoricalArray): def setitem_arr_nan_cat(arr, ind): - int_arr = hpat.hiframes.pd_categorical_ext.cat_array_to_int(arr) + int_arr = sdc.hiframes.pd_categorical_ext.cat_array_to_int(arr) int_arr[ind] = -1 return setitem_arr_nan_cat diff --git a/hpat/hiframes/pd_categorical_ext.py b/sdc/hiframes/pd_categorical_ext.py similarity index 100% rename from hpat/hiframes/pd_categorical_ext.py rename to sdc/hiframes/pd_categorical_ext.py diff --git a/hpat/hiframes/pd_dataframe_ext.py b/sdc/hiframes/pd_dataframe_ext.py similarity index 95% rename from hpat/hiframes/pd_dataframe_ext.py rename to sdc/hiframes/pd_dataframe_ext.py index 24c60e640..61e988095 100644 --- a/hpat/hiframes/pd_dataframe_ext.py +++ b/sdc/hiframes/pd_dataframe_ext.py @@ -39,10 +39,10 @@ AttributeTemplate, bound_function) from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed -import hpat -from hpat.hiframes.pd_series_ext import SeriesType -from hpat.str_ext import string_type -from hpat.str_arr_ext import string_array_type +import sdc +from sdc.hiframes.pd_series_ext import SeriesType +from sdc.str_ext import string_type +from sdc.str_arr_ext import string_array_type class DataFrameType(types.Type): # TODO: IterableType over column names @@ -256,7 +256,7 @@ def get_dataframe_data(df, i): def _impl(df, i): if has_parent(df) and df._unboxed[i] == 0: # TODO: make df refcounted to avoid repeated unboxing - df = hpat.hiframes.boxing.unbox_dataframe_column(df, i) + df = sdc.hiframes.boxing.unbox_dataframe_column(df, i) return df._data[i] return _impl @@ -390,7 +390,7 @@ def codegen(context, builder, signature, args): # call boxing for array data # TODO: check complex data types possible for Series for dataframes set column here c = numba.pythonapi._BoxContext(context, builder, pyapi, env_manager) - py_arr = hpat.hiframes.boxing._box_series_data(arr.dtype, arr, arr_arg, c) + py_arr = sdc.hiframes.boxing._box_series_data(arr.dtype, arr, arr_arg, c) # get column as string obj cstr = context.insert_const_string(builder.module, col_name) @@ -425,7 +425,7 @@ def df_len_overload(df): def df_getitem_overload(df, ind): if isinstance(df, DataFrameType) and isinstance(ind, types.StringLiteral): index = df.columns.index(ind.literal_value) - return lambda df, ind: hpat.hiframes.api.init_series(df._data[index]) + return lambda df, ind: sdc.hiframes.api.init_series(df._data[index]) @infer_global(operator.getitem) @@ -642,7 +642,7 @@ def _impl(left, right, how='inner', on=None, left_on=None, if not onHasNoneType: left_on = right_on = on - return hpat.hiframes.api.join_dummy(left, right, left_on, right_on, how) + return sdc.hiframes.api.join_dummy(left, right, left_on, right_on, how) return _impl @@ -664,7 +664,7 @@ def _impl(left, right, on=None, left_on=None, right_on=None, if not onHasNoneType: left_on = right_on = on - return hpat.hiframes.api.join_dummy(left, right, left_on, right_on, 'asof') + return sdc.hiframes.api.join_dummy(left, right, left_on, right_on, 'asof') return _impl @@ -678,7 +678,7 @@ def _impl(df, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', _pivot_values=None): - return hpat.hiframes.pd_groupby_ext.pivot_table_dummy( + return sdc.hiframes.pd_groupby_ext.pivot_table_dummy( df, values, index, columns, aggfunc, _pivot_values) return _impl @@ -693,7 +693,7 @@ def crosstab_overload(index, columns, values=None, rownames=None, colnames=None, def _impl(index, columns, values=None, rownames=None, colnames=None, aggfunc=None, margins=False, margins_name='All', dropna=True, normalize=False, _pivot_values=None): - return hpat.hiframes.pd_groupby_ext.crosstab_dummy( + return sdc.hiframes.pd_groupby_ext.crosstab_dummy( index, columns, _pivot_values) return _impl @@ -706,7 +706,7 @@ def concat_overload(objs, axis=0, join='outer', join_axes=None, return (lambda objs, axis=0, join='outer', join_axes=None, ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, sort=None, copy=True: - hpat.hiframes.pd_dataframe_ext.concat_dummy(objs, axis)) + sdc.hiframes.pd_dataframe_ext.concat_dummy(objs, axis)) def concat_dummy(objs): @@ -777,8 +777,8 @@ def generic(self, args, kws): # TODO: fix NA column additions for other types if len(arr_args) < len(objs.types): arr_args.append(types.Array(types.float64, 1, 'C')) - # use hpat.hiframes.api.concat() typer - concat_typ = hpat.hiframes.api.ConcatType( + # use sdc.hiframes.api.concat() typer + concat_typ = sdc.hiframes.api.ConcatType( self.context).generic((types.Tuple(arr_args),), {}) all_data.append(concat_typ.return_type) @@ -789,7 +789,7 @@ def generic(self, args, kws): elif isinstance(objs.types[0], SeriesType): assert all(isinstance(t, SeriesType) for t in objs.types) arr_args = [S.data for S in objs.types] - concat_typ = hpat.hiframes.api.ConcatType( + concat_typ = sdc.hiframes.api.ConcatType( self.context).generic((types.Tuple(arr_args),), {}) ret_typ = SeriesType(concat_typ.return_type.dtype) return signature(ret_typ, *args) @@ -812,7 +812,7 @@ def sort_values_overload(df, by, axis=0, ascending=True, inplace=False, def _impl(df, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): - return hpat.hiframes.pd_dataframe_ext.sort_values_dummy( + return sdc.hiframes.pd_dataframe_ext.sort_values_dummy( df, by, ascending, inplace) return _impl @@ -829,7 +829,7 @@ def generic(self, args, kws): df, by, ascending, inplace = args # inplace value - if isinstance(inplace, hpat.utils.BooleanLiteral): + if isinstance(inplace, sdc.utils.BooleanLiteral): inplace = inplace.literal_value else: # XXX inplace type is just bool when value not passed. Therefore, @@ -881,7 +881,7 @@ def lower_set_parent_dummy(context, builder, sig, args): def itertuples_overload(df, index=True, name='Pandas'): def _impl(df, index=True, name='Pandas'): - return hpat.hiframes.pd_dataframe_ext.itertuples_dummy(df) + return sdc.hiframes.pd_dataframe_ext.itertuples_dummy(df) return _impl @@ -899,7 +899,7 @@ def generic(self, args, kws): assert "Index" not in df.columns columns = ('Index',) + df.columns arr_types = (types.Array(types.int64, 1, 'C'),) + df.data - iter_typ = hpat.hiframes.api.DataFrameTupleIterator(columns, arr_types) + iter_typ = sdc.hiframes.api.DataFrameTupleIterator(columns, arr_types) return signature(iter_typ, *args) @@ -915,7 +915,7 @@ def head_overload(df, n=5): # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, n=5): - return hpat.hiframes.pd_dataframe_ext.head_dummy(df, n) + return sdc.hiframes.pd_dataframe_ext.head_dummy(df, n) return _impl @@ -949,7 +949,7 @@ def fillna_overload(df, value=None, method=None, axis=None, inplace=False, # TODO: inplace of df with parent that has a string column (reflection) def _impl(df, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): - return hpat.hiframes.pd_dataframe_ext.fillna_dummy(df, value, inplace) + return sdc.hiframes.pd_dataframe_ext.fillna_dummy(df, value, inplace) return _impl @@ -963,7 +963,7 @@ class FillnaDummyTyper(AbstractTemplate): def generic(self, args, kws): df, value, inplace = args # inplace value - if isinstance(inplace, hpat.utils.BooleanLiteral): + if isinstance(inplace, sdc.utils.BooleanLiteral): inplace = inplace.literal_value else: # XXX inplace type is just bool when value not passed. Therefore, @@ -993,7 +993,7 @@ def reset_index_overload(df, level=None, drop=False, inplace=False, # TODO: inplace of df with parent (reflection) def _impl(df, level=None, drop=False, inplace=False, col_level=0, col_fill=''): - return hpat.hiframes.pd_dataframe_ext.reset_index_dummy(df, inplace) + return sdc.hiframes.pd_dataframe_ext.reset_index_dummy(df, inplace) return _impl @@ -1007,7 +1007,7 @@ class ResetIndexDummyTyper(AbstractTemplate): def generic(self, args, kws): df, inplace = args # inplace value - if isinstance(inplace, hpat.utils.BooleanLiteral): + if isinstance(inplace, sdc.utils.BooleanLiteral): inplace = inplace.literal_value else: # XXX inplace type is just bool when value not passed. Therefore, @@ -1035,7 +1035,7 @@ def dropna_overload(df, axis=0, how='any', thresh=None, subset=None, # TODO: avoid dummy and generate func here when inlining is possible # TODO: inplace of df with parent (reflection) def _impl(df, axis=0, how='any', thresh=None, subset=None, inplace=False): - return hpat.hiframes.pd_dataframe_ext.dropna_dummy(df, inplace) + return sdc.hiframes.pd_dataframe_ext.dropna_dummy(df, inplace) return _impl @@ -1049,7 +1049,7 @@ class DropnaDummyTyper(AbstractTemplate): def generic(self, args, kws): df, inplace = args # inplace value - if isinstance(inplace, hpat.utils.BooleanLiteral): + if isinstance(inplace, sdc.utils.BooleanLiteral): inplace = inplace.literal_value else: # XXX inplace type is just bool when value not passed. Therefore, @@ -1079,7 +1079,7 @@ def drop_overload(df, labels=None, axis=0, index=None, columns=None, # TODO: inplace of df with parent (reflection) def _impl(df, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'): - return hpat.hiframes.pd_dataframe_ext.drop_dummy( + return sdc.hiframes.pd_dataframe_ext.drop_dummy( df, labels, axis, columns, inplace) return _impl @@ -1119,7 +1119,7 @@ def generic(self, args, kws): new_data = tuple(df.data[df.columns.index(c)] for c in new_cols) # inplace value - if isinstance(inplace, hpat.utils.BooleanLiteral): + if isinstance(inplace, sdc.utils.BooleanLiteral): inplace = inplace.literal_value else: # XXX inplace type is just bool when value not passed. Therefore, @@ -1147,7 +1147,7 @@ def lower_drop_dummy(context, builder, sig, args): def isna_overload(df): def _impl(df): - return hpat.hiframes.pd_dataframe_ext.isna_dummy(df) + return sdc.hiframes.pd_dataframe_ext.isna_dummy(df) return _impl @@ -1178,7 +1178,7 @@ def lower_isna_dummy(context, builder, sig, args): def astype_overload(df, dtype, copy=True, errors='raise'): def _impl(df, dtype, copy=True, errors='raise'): - return hpat.hiframes.pd_dataframe_ext.astype_dummy(df, dtype, copy, errors) + return sdc.hiframes.pd_dataframe_ext.astype_dummy(df, dtype, copy, errors) return _impl @@ -1213,7 +1213,7 @@ def lower_astype_dummy(context, builder, sig, args): def isin_overload(df, values): def _impl(df, values): - return hpat.hiframes.pd_dataframe_ext.isin_dummy(df, values) + return sdc.hiframes.pd_dataframe_ext.isin_dummy(df, values) return _impl @@ -1262,7 +1262,7 @@ def pct_change_overload(df, periods=1, fill_method='pad', limit=None, freq=None) # TODO: kwargs # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, periods=1, fill_method='pad', limit=None, freq=None): - return hpat.hiframes.pd_dataframe_ext.pct_change_dummy(df, periods) + return sdc.hiframes.pd_dataframe_ext.pct_change_dummy(df, periods) return _impl @@ -1294,7 +1294,7 @@ def mean_overload(df, axis=None, skipna=None, level=None, numeric_only=None): # TODO: kwargs # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, axis=None, skipna=None, level=None, numeric_only=None): - return hpat.hiframes.pd_dataframe_ext.mean_dummy(df) + return sdc.hiframes.pd_dataframe_ext.mean_dummy(df) return _impl @@ -1327,7 +1327,7 @@ def median_overload(df, axis=None, skipna=None, level=None, numeric_only=None): # TODO: kwargs # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, axis=None, skipna=None, level=None, numeric_only=None): - return hpat.hiframes.pd_dataframe_ext.median_dummy(df) + return sdc.hiframes.pd_dataframe_ext.median_dummy(df) return _impl @@ -1361,7 +1361,7 @@ def std_overload(df, axis=None, skipna=None, level=None, ddof=1, numeric_only=No # TODO: avoid dummy and generate func here when inlining is possible # TODO: support ddof def _impl(df, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): - return hpat.hiframes.pd_dataframe_ext.std_dummy(df) + return sdc.hiframes.pd_dataframe_ext.std_dummy(df) return _impl @@ -1395,7 +1395,7 @@ def var_overload(df, axis=None, skipna=None, level=None, ddof=1, numeric_only=No # TODO: avoid dummy and generate func here when inlining is possible # TODO: support ddof def _impl(df, axis=None, skipna=None, level=None, ddof=1, numeric_only=None): - return hpat.hiframes.pd_dataframe_ext.var_dummy(df) + return sdc.hiframes.pd_dataframe_ext.var_dummy(df) return _impl @@ -1428,7 +1428,7 @@ def max_overload(df, axis=None, skipna=None, level=None, numeric_only=None): # TODO: kwargs # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, axis=None, skipna=None, level=None, numeric_only=None): - return hpat.hiframes.pd_dataframe_ext.max_dummy(df) + return sdc.hiframes.pd_dataframe_ext.max_dummy(df) return _impl @@ -1461,7 +1461,7 @@ def min_overload(df, axis=None, skipna=None, level=None, numeric_only=None): # TODO: kwargs # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, axis=None, skipna=None, level=None, numeric_only=None): - return hpat.hiframes.pd_dataframe_ext.min_dummy(df) + return sdc.hiframes.pd_dataframe_ext.min_dummy(df) return _impl @@ -1496,7 +1496,7 @@ def sum_overload(df, axis=None, skipna=None, level=None, numeric_only=None, # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, axis=None, skipna=None, level=None, numeric_only=None, min_count=0): - return hpat.hiframes.pd_dataframe_ext.sum_dummy(df) + return sdc.hiframes.pd_dataframe_ext.sum_dummy(df) return _impl @@ -1536,7 +1536,7 @@ def prod_overload(df, axis=None, skipna=None, level=None, numeric_only=None, # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, axis=None, skipna=None, level=None, numeric_only=None, min_count=0): - return hpat.hiframes.pd_dataframe_ext.prod_dummy(df) + return sdc.hiframes.pd_dataframe_ext.prod_dummy(df) return _impl @@ -1573,7 +1573,7 @@ def lower_prod_dummy(context, builder, sig, args): def count_overload(df, axis=0, level=None, numeric_only=False): # TODO: avoid dummy and generate func here when inlining is possible def _impl(df, axis=0, level=None, numeric_only=False): - return hpat.hiframes.pd_dataframe_ext.count_dummy(df) + return sdc.hiframes.pd_dataframe_ext.count_dummy(df) return _impl diff --git a/hpat/hiframes/pd_groupby_ext.py b/sdc/hiframes/pd_groupby_ext.py similarity index 95% rename from hpat/hiframes/pd_groupby_ext.py rename to sdc/hiframes/pd_groupby_ext.py index 03fdac85d..d726d294e 100644 --- a/hpat/hiframes/pd_groupby_ext.py +++ b/sdc/hiframes/pd_groupby_ext.py @@ -36,12 +36,12 @@ from numba.typing.templates import (infer_global, AbstractTemplate, signature, AttributeTemplate, bound_function) from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed -import hpat -from hpat.hiframes.pd_series_ext import (SeriesType, _get_series_array_type, +import sdc +from sdc.hiframes.pd_series_ext import (SeriesType, _get_series_array_type, arr_to_series_type) -from hpat.str_ext import string_type -from hpat.hiframes.pd_dataframe_ext import DataFrameType -from hpat.hiframes.aggregate import get_agg_func +from sdc.str_ext import string_type +from sdc.hiframes.pd_dataframe_ext import DataFrameType +from sdc.hiframes.aggregate import get_agg_func class DataFrameGroupByType(types.Type): # TODO: IterableType over groups @@ -81,7 +81,7 @@ def df_groupby_overload(df, by=None, axis=0, level=None, as_index=True, def _impl(df, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False): - return hpat.hiframes.pd_groupby_ext.groupby_dummy(df, by, as_index) + return sdc.hiframes.pd_groupby_ext.groupby_dummy(df, by, as_index) return _impl @@ -106,7 +106,7 @@ def generic(self, args, kws): for k in keys: selection.remove(k) - if isinstance(as_index, hpat.utils.BooleanLiteral): + if isinstance(as_index, sdc.utils.BooleanLiteral): as_index = as_index.literal_value else: # XXX as_index type is just bool when value not passed. Therefore, @@ -152,7 +152,7 @@ class DataframeGroupByAttribute(AttributeTemplate): def _get_agg_typ(self, grp, args, code): f_ir = numba.ir_utils.get_ir_of_code( - {'np': np, 'numba': numba, 'hpat': hpat}, code) + {'np': np, 'numba': numba, 'sdc': sdc}, code) out_data = [] out_columns = [] # add key columns of not as_index @@ -254,7 +254,7 @@ def generic(self, args, kws): data = df.data[df.columns.index(values)] func = get_agg_func(None, aggfunc.literal_value, None) f_ir = numba.ir_utils.get_ir_of_code( - {'np': np, 'numba': numba, 'hpat': hpat}, func.__code__) + {'np': np, 'numba': numba, 'sdc': sdc}, func.__code__) _, out_dtype, _ = numba.typed_passes.type_inference_stage( self.context, f_ir, (data,), None) out_arr_typ = _get_series_array_type(out_dtype) diff --git a/hpat/hiframes/pd_index_ext.py b/sdc/hiframes/pd_index_ext.py similarity index 92% rename from hpat/hiframes/pd_index_ext.py rename to sdc/hiframes/pd_index_ext.py index ec35dd320..6e7e135cf 100644 --- a/hpat/hiframes/pd_index_ext.py +++ b/sdc/hiframes/pd_index_ext.py @@ -36,13 +36,13 @@ AttributeTemplate, bound_function) from numba.targets.boxing import box_array -import hpat -from hpat.str_ext import string_type -import hpat.hiframes -from hpat.hiframes.pd_series_ext import (is_str_series_typ, string_array_type, +import sdc +from sdc.str_ext import string_type +import sdc.hiframes +from sdc.hiframes.pd_series_ext import (is_str_series_typ, string_array_type, SeriesType) -from hpat.hiframes.pd_timestamp_ext import pandas_timestamp_type, datetime_date_type -from hpat.hiframes.datetime_date_ext import array_datetime_date +from sdc.hiframes.pd_timestamp_ext import pandas_timestamp_type, datetime_date_type +from sdc.hiframes.datetime_date_ext import array_datetime_date _dt_index_data_typ = types.Array(types.NPDatetime('ns'), 1, 'C') _timedelta_index_data_typ = types.Array(types.NPTimedelta('ns'), 1, 'C') @@ -145,7 +145,7 @@ def resolve_date_field(self, ary): return SeriesType(types.int64) -for field in hpat.hiframes.pd_timestamp_ext.date_fields: +for field in sdc.hiframes.pd_timestamp_ext.date_fields: setattr(DatetimeIndexAttribute, "resolve_" + field, resolve_date_field) @@ -162,15 +162,15 @@ def pd_datetimeindex_overload(data=None, freq=None, start=None, end=None, return (lambda data=None, freq=None, start=None, end=None, periods=None, tz=None, normalize=False, closed=None, ambiguous='raise', dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None, - verify_integrity=True: hpat.hiframes.api.init_datetime_index( - hpat.hiframes.api.ts_series_to_arr_typ(data), name)) + verify_integrity=True: sdc.hiframes.api.init_datetime_index( + sdc.hiframes.api.ts_series_to_arr_typ(data), name)) def f(data=None, freq=None, start=None, end=None, periods=None, tz=None, normalize=False, closed=None, ambiguous='raise', dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None, verify_integrity=True): - S = hpat.hiframes.api.parse_datetimes_from_strings(data) - return hpat.hiframes.api.init_datetime_index(S, name) + S = sdc.hiframes.api.parse_datetimes_from_strings(data) + return sdc.hiframes.api.init_datetime_index(S, name) return f @@ -243,5 +243,5 @@ def resolve_timedelta_field(self, ary): return types.Array(types.int64, 1, 'C') -for field in hpat.hiframes.pd_timestamp_ext.timedelta_fields: +for field in sdc.hiframes.pd_timestamp_ext.timedelta_fields: setattr(TimedeltaIndexAttribute, "resolve_" + field, resolve_timedelta_field) diff --git a/hpat/hiframes/pd_rolling_ext.py b/sdc/hiframes/pd_rolling_ext.py similarity index 96% rename from hpat/hiframes/pd_rolling_ext.py rename to sdc/hiframes/pd_rolling_ext.py index a088f9c1a..fa717fe62 100644 --- a/hpat/hiframes/pd_rolling_ext.py +++ b/sdc/hiframes/pd_rolling_ext.py @@ -36,12 +36,12 @@ from numba.typing.templates import (infer_global, AbstractTemplate, signature, AttributeTemplate, bound_function) from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed -import hpat -from hpat.hiframes.pd_series_ext import (SeriesType, _get_series_array_type, +import sdc +from sdc.hiframes.pd_series_ext import (SeriesType, _get_series_array_type, arr_to_series_type) -from hpat.str_ext import string_type -from hpat.hiframes.pd_dataframe_ext import DataFrameType -from hpat.hiframes.rolling import supported_rolling_funcs +from sdc.str_ext import string_type +from sdc.hiframes.pd_dataframe_ext import DataFrameType +from sdc.hiframes.rolling import supported_rolling_funcs class RollingType(types.Type): @@ -78,7 +78,7 @@ def df_rolling_overload(df, window, min_periods=None, center=False, def _impl(df, window, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None): - return hpat.hiframes.pd_rolling_ext.rolling_dummy( + return sdc.hiframes.pd_rolling_ext.rolling_dummy( df, window, center, on) return _impl diff --git a/hpat/hiframes/pd_series_ext.py b/sdc/hiframes/pd_series_ext.py similarity index 97% rename from hpat/hiframes/pd_series_ext.py rename to sdc/hiframes/pd_series_ext.py index abeff1e8d..5e2f23e97 100644 --- a/hpat/hiframes/pd_series_ext.py +++ b/sdc/hiframes/pd_series_ext.py @@ -53,13 +53,13 @@ from numba.targets.imputils import (impl_ret_new_ref, iternext_impl, RefType) from numba.targets.arrayobj import (make_array, _getitem_array1d) -import hpat -from hpat.datatypes.hpat_pandas_stringmethods_types import StringMethodsType -from hpat.hiframes.pd_categorical_ext import (PDCategoricalDtype, CategoricalArray) -from hpat.hiframes.pd_timestamp_ext import (pandas_timestamp_type, datetime_date_type) -from hpat.hiframes.rolling import supported_rolling_funcs -from hpat.hiframes.split_impl import (string_array_split_view_type, GetItemStringArraySplitView) -from hpat.str_arr_ext import ( +import sdc +from sdc.datatypes.hpat_pandas_stringmethods_types import StringMethodsType +from sdc.hiframes.pd_categorical_ext import (PDCategoricalDtype, CategoricalArray) +from sdc.hiframes.pd_timestamp_ext import (pandas_timestamp_type, datetime_date_type) +from sdc.hiframes.rolling import supported_rolling_funcs +from sdc.hiframes.split_impl import (string_array_split_view_type, GetItemStringArraySplitView) +from sdc.str_arr_ext import ( string_array_type, iternext_str_array, offset_typ, @@ -67,7 +67,7 @@ str_arr_payload_type, StringArrayType, GetItemStringArray) -from hpat.str_ext import string_type, list_string_array_type +from sdc.str_ext import string_type, list_string_array_type class SeriesType(types.IterableType): @@ -761,7 +761,7 @@ def resolve_rename(self, ary, args, kws): """ Functions which are used from Numba directly by calling from StringMethodsType - Test: HPAT_CONFIG_PIPELINE_HPAT=0 python -m hpat.runtests hpat.tests.test_series.TestSeries.test_series_str2str + Test: HPAT_CONFIG_PIPELINE_HPAT=0 python -m sdc.runtests sdc.tests.test_series.TestSeries.test_series_str2str """ # class SeriesStrMethodType(types.Type): @@ -803,7 +803,7 @@ def resolve_get(self, ary, args, kws): return signature(SeriesType(string_type), *args) def generic_resolve(self, s_str, func_name): - if hpat.config.config_pipeline_hpat_default and func_name in str2str_methods: + if sdc.config.config_pipeline_hpat_default and func_name in str2str_methods: template_key = 'strmethod.' + func_name out_typ = SeriesType(string_type) @@ -843,7 +843,7 @@ def resolve_date_field(self, ary): return SeriesType(types.int64) -for field in hpat.hiframes.pd_timestamp_ext.date_fields: +for field in sdc.hiframes.pd_timestamp_ext.date_fields: setattr(SeriesDtMethodAttribute, "resolve_" + field, resolve_date_field) @@ -922,7 +922,7 @@ class SeriesCompEqual(AbstractTemplate): key = '==' def generic(self, args, kws): - from hpat.str_arr_ext import is_str_arr_typ + from sdc.str_arr_ext import is_str_arr_typ assert not kws [va, vb] = args # if one of the inputs is string array @@ -1010,7 +1010,7 @@ def generic_expand_cumulative_series(self, args, kws): 'resolve_prod', 'resolve_count', 'resolve_dropna', 'resolve_fillna', 'resolve_astype'] # disable using of some Array attributes in non-hpat pipeline only -if not hpat.config.config_pipeline_hpat_default: +if not sdc.config.config_pipeline_hpat_default: for attr in ['resolve_std', 'resolve_var']: _not_series_array_attrs.append(attr) @@ -1024,7 +1024,7 @@ def generic_expand_cumulative_series(self, args, kws): setattr(SeriesAttribute, attr, func) # remove some attributes from SeriesAttribute for non-hpat pipeline -if not hpat.config.config_pipeline_hpat_default: +if not sdc.config.config_pipeline_hpat_default: for attr in _non_hpat_pipeline_attrs: if attr in SeriesAttribute.__dict__: delattr(SeriesAttribute, attr) @@ -1278,9 +1278,9 @@ def typer(val1, val2): if is_dt64_series_typ(val1) and val2 == pandas_timestamp_type: return SeriesType(types.NPTimedelta('ns')) - from hpat.hiframes.pd_index_ext import DatetimeIndexType + from sdc.hiframes.pd_index_ext import DatetimeIndexType if isinstance(val1, DatetimeIndexType) and val2 == pandas_timestamp_type: - from hpat.hiframes.pd_index_ext import TimedeltaIndexType + from sdc.hiframes.pd_index_ext import TimedeltaIndexType return TimedeltaIndexType(False) return typer @@ -1299,12 +1299,12 @@ def hpat_pandas_series_ctor_impl(data=None, index=None, dtype=None, name=None, c '''' use binop here as otherwise Numba's dead branch pruning doesn't work TODO: replace with 'if not is_index_none' when resolved ''' if is_index_none == False: # noqa - fix_index = hpat.hiframes.api.fix_df_array(index) + fix_index = sdc.hiframes.api.fix_df_array(index) else: fix_index = index - return hpat.hiframes.api.init_series(hpat.hiframes.api.fix_df_array(data), fix_index, name) + return sdc.hiframes.api.init_series(sdc.hiframes.api.fix_df_array(data), fix_index, name) return hpat_pandas_series_ctor_impl -from hpat.datatypes.hpat_pandas_series_functions import * +from sdc.datatypes.hpat_pandas_series_functions import * diff --git a/hpat/hiframes/pd_timestamp_ext.py b/sdc/hiframes/pd_timestamp_ext.py similarity index 99% rename from hpat/hiframes/pd_timestamp_ext.py rename to sdc/hiframes/pd_timestamp_ext.py index 34dca3598..6b4146042 100644 --- a/hpat/hiframes/pd_timestamp_ext.py +++ b/sdc/hiframes/pd_timestamp_ext.py @@ -41,8 +41,8 @@ import numpy as np import ctypes import inspect -import hpat.str_ext -import hpat.utils +import sdc.str_ext +import sdc.utils from llvmlite import ir as lir @@ -291,7 +291,7 @@ def int_array_to_datetime_date(ia): def box_datetime_date_array(typ, val, c): ary = box_array(types.Array(types.int64, 1, 'C'), val, c) - hpat_name = c.context.insert_const_string(c.builder.module, 'hpat') + hpat_name = c.context.insert_const_string(c.builder.module, 'sdc') hpat_mod = c.pyapi.import_module_noblock(hpat_name) hi_mod = c.pyapi.object_getattr_string(hpat_mod, 'hiframes') pte_mod = c.pyapi.object_getattr_string(hi_mod, 'pd_timestamp_ext') @@ -959,7 +959,7 @@ def impl_myref_pandas_dts_type(context, builder, sig, args): @numba.njit(locals={'arg1': numba.int32, 'arg3': numba.int32, 'arg4': numba.int32}) def parse_datetime_str(str): - arg0 = hpat.str_ext.unicode_to_char_ptr(str) + arg0 = sdc.str_ext.unicode_to_char_ptr(str) arg1 = len(str) arg2 = PANDAS_DATETIMESTRUCT() arg3 = np.int32(13) diff --git a/hpat/hiframes/rolling.py b/sdc/hiframes/rolling.py similarity index 89% rename from hpat/hiframes/rolling.py rename to sdc/hiframes/rolling.py index 6853df415..4b5cb4e4a 100644 --- a/hpat/hiframes/rolling.py +++ b/sdc/hiframes/rolling.py @@ -27,7 +27,7 @@ import numpy as np import pandas as pd -import hpat +import sdc import numba from numba import types from numba.extending import lower_builtin, overload @@ -36,9 +36,9 @@ from numba.typing.templates import infer_global, AbstractTemplate from numba.ir_utils import guard, find_const -from hpat.distributed_api import Reduce_Type -from hpat.hiframes.pd_timestamp_ext import integer_to_dt64 -from hpat.utils import unliteral_all +from sdc.distributed_api import Reduce_Type +from sdc.hiframes.pd_timestamp_ext import integer_to_dt64 +from sdc.utils import unliteral_all supported_rolling_funcs = ('sum', 'mean', 'var', 'std', 'count', 'median', @@ -111,7 +111,7 @@ def generic(self, args, kws): # result is always float64 in pandas # see _prep_values() in window.py f_type = args[4] - from hpat.hiframes.pd_series_ext import if_series_to_array_type + from sdc.hiframes.pd_series_ext import if_series_to_array_type ret_typ = if_series_to_array_type(arr).copy(dtype=types.float64) return signature(ret_typ, arr, types.intp, types.bool_, types.bool_, f_type) @@ -125,7 +125,7 @@ def generic(self, args, kws): # result is always float64 in pandas # see _prep_values() in window.py f_type = args[5] - from hpat.hiframes.pd_series_ext import if_series_to_array_type + from sdc.hiframes.pd_series_ext import if_series_to_array_type ret_typ = if_series_to_array_type(arr).copy(dtype=types.float64) return signature(ret_typ, arr, on_arr, types.intp, types.bool_, types.bool_, f_type) @@ -137,7 +137,7 @@ class RollingCovType(AbstractTemplate): def generic(self, args, kws): arr = args[0] # array or series # hiframes_typed pass replaces series input with array after typing - from hpat.hiframes.pd_series_ext import if_series_to_array_type + from sdc.hiframes.pd_series_ext import if_series_to_array_type ret_typ = if_series_to_array_type(arr).copy(dtype=types.float64) return signature(ret_typ, *unliteral_all(args)) @@ -253,8 +253,8 @@ def func(a, o, w, c, p, f): @numba.njit def roll_fixed_linear_generic(in_arr, win, center, parallel, init_data, add_obs, remove_obs, calc_out): # pragma: no cover - rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() + rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() N = len(in_arr) # TODO: support minp arg end_range etc. minp = win @@ -280,7 +280,7 @@ def roll_fixed_linear_generic(in_arr, win, center, parallel, init_data, # recv right if center and rank != n_pes - 1: - hpat.distributed_api.wait(r_recv_req, True) + sdc.distributed_api.wait(r_recv_req, True) for i in range(0, halo_size): data = add_obs(r_recv_buff[i], *data) @@ -292,7 +292,7 @@ def roll_fixed_linear_generic(in_arr, win, center, parallel, init_data, # recv left if rank != 0: - hpat.distributed_api.wait(l_recv_req, True) + sdc.distributed_api.wait(l_recv_req, True) data = init_data() for i in range(0, halo_size): data = add_obs(l_recv_buff[i], *data) @@ -352,8 +352,8 @@ def roll_fixed_linear_generic_seq(in_arr, win, center, init_data, add_obs, @numba.njit def roll_fixed_apply(in_arr, win, center, parallel, kernel_func): # pragma: no cover - rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() + rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() N = len(in_arr) # TODO: support minp arg end_range etc. minp = win @@ -378,7 +378,7 @@ def roll_fixed_apply(in_arr, win, center, parallel, kernel_func): # pragma: no # recv right if center and rank != n_pes - 1: - hpat.distributed_api.wait(r_recv_req, True) + sdc.distributed_api.wait(r_recv_req, True) border_data = np.concatenate((in_arr[N - win + 1:], r_recv_buff)) ind = 0 for i in range(max(N - offset, 0), N): @@ -387,7 +387,7 @@ def roll_fixed_apply(in_arr, win, center, parallel, kernel_func): # pragma: no # recv left if rank != 0: - hpat.distributed_api.wait(l_recv_req, True) + sdc.distributed_api.wait(l_recv_req, True) border_data = np.concatenate((l_recv_buff, in_arr[:win - 1])) for i in range(0, win - offset - 1): output[i] = kernel_func(border_data[i:i + win]) @@ -423,8 +423,8 @@ def roll_fixed_apply_seq(in_arr, win, center, kernel_func): # pragma: no cover @numba.njit def roll_var_linear_generic(in_arr, on_arr_dt, win, center, parallel, init_data, add_obs, remove_obs, calc_out): # pragma: no cover - rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() + rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() on_arr = cast_dt64_arr_to_int(on_arr_dt) N = len(in_arr) # TODO: support minp arg end_range etc. @@ -453,8 +453,8 @@ def roll_var_linear_generic(in_arr, on_arr_dt, win, center, parallel, init_data, # recv left if rank != 0: - hpat.distributed_api.wait(l_recv_req, True) - hpat.distributed_api.wait(l_recv_t_req, True) + sdc.distributed_api.wait(l_recv_req, True) + sdc.distributed_api.wait(l_recv_t_req, True) # values with start == 0 could potentially have left halo starts num_zero_starts = 0 @@ -561,8 +561,8 @@ def roll_var_linear_generic_seq(in_arr, on_arr, win, start, end, init_data, @numba.njit def roll_variable_apply(in_arr, on_arr_dt, win, center, parallel, kernel_func): # pragma: no cover - rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() + rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() on_arr = cast_dt64_arr_to_int(on_arr_dt) N = len(in_arr) # TODO: support minp arg end_range etc. @@ -590,8 +590,8 @@ def roll_variable_apply(in_arr, on_arr_dt, win, center, parallel, kernel_func): # recv left if rank != 0: - hpat.distributed_api.wait(l_recv_req, True) - hpat.distributed_api.wait(l_recv_t_req, True) + sdc.distributed_api.wait(l_recv_req, True) + sdc.distributed_api.wait(l_recv_t_req, True) # values with start == 0 could potentially have left halo starts num_zero_starts = 0 @@ -862,8 +862,8 @@ def shift_overload(in_arr, shift, parallel): def shift_impl(in_arr, shift, parallel): # pragma: no cover N = len(in_arr) if parallel: - rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() + rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() halo_size = np.int32(shift) if _is_small_for_parallel(N, halo_size): return _handle_small_data_shift(in_arr, shift, rank, n_pes) @@ -880,7 +880,7 @@ def shift_impl(in_arr, shift, parallel): # pragma: no cover # recv left if rank != 0: - hpat.distributed_api.wait(l_recv_req, True) + sdc.distributed_api.wait(l_recv_req, True) for i in range(0, halo_size): output[i] = l_recv_buff[i] @@ -891,7 +891,7 @@ def shift_impl(in_arr, shift, parallel): # pragma: no cover @numba.njit def shift_seq(in_arr, shift): # pragma: no cover N = len(in_arr) - output = hpat.hiframes.api.alloc_shift(in_arr) + output = sdc.hiframes.api.alloc_shift(in_arr) shift = min(shift, N) output[:shift] = np.nan @@ -918,8 +918,8 @@ def pct_change_overload(in_arr, shift, parallel): def pct_change_impl(in_arr, shift, parallel): # pragma: no cover N = len(in_arr) if parallel: - rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() + rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() halo_size = np.int32(shift) if _is_small_for_parallel(N, halo_size): return _handle_small_data_pct_change(in_arr, shift, rank, n_pes) @@ -936,7 +936,7 @@ def pct_change_impl(in_arr, shift, parallel): # pragma: no cover # recv left if rank != 0: - hpat.distributed_api.wait(l_recv_req, True) + sdc.distributed_api.wait(l_recv_req, True) for i in range(0, halo_size): prev = l_recv_buff[i] @@ -948,7 +948,7 @@ def pct_change_impl(in_arr, shift, parallel): # pragma: no cover @numba.njit def pct_change_seq(in_arr, shift): # pragma: no cover N = len(in_arr) - output = hpat.hiframes.api.alloc_shift(in_arr) + output = sdc.hiframes.api.alloc_shift(in_arr) shift = min(shift, N) output[:shift] = np.nan @@ -969,17 +969,17 @@ def _border_icomm(in_arr, rank, n_pes, halo_size, dtype, center): # pragma: no r_recv_buff = np.empty(halo_size, dtype) # send right if rank != n_pes - 1: - r_send_req = hpat.distributed_api.isend(in_arr[-halo_size:], halo_size, np.int32(rank + 1), comm_tag, True) + r_send_req = sdc.distributed_api.isend(in_arr[-halo_size:], halo_size, np.int32(rank + 1), comm_tag, True) # recv left if rank != 0: - l_recv_req = hpat.distributed_api.irecv(l_recv_buff, halo_size, np.int32(rank - 1), comm_tag, True) + l_recv_req = sdc.distributed_api.irecv(l_recv_buff, halo_size, np.int32(rank - 1), comm_tag, True) # center cases # send left if center and rank != 0: - l_send_req = hpat.distributed_api.isend(in_arr[:halo_size], halo_size, np.int32(rank - 1), comm_tag, True) + l_send_req = sdc.distributed_api.isend(in_arr[:halo_size], halo_size, np.int32(rank - 1), comm_tag, True) # recv right if center and rank != n_pes - 1: - r_recv_req = hpat.distributed_api.irecv(r_recv_buff, halo_size, np.int32(rank + 1), comm_tag, True) + r_recv_req = sdc.distributed_api.irecv(r_recv_buff, halo_size, np.int32(rank + 1), comm_tag, True) return l_recv_buff, r_recv_buff, l_send_req, r_send_req, l_recv_req, r_recv_req @@ -999,18 +999,18 @@ def _border_icomm_var(in_arr, on_arr, rank, n_pes, win_size, dtype): # pragma: # send right if rank != n_pes - 1: - hpat.distributed_api.send(halo_size, np.int32(rank + 1), comm_tag) - r_send_req = hpat.distributed_api.isend( + sdc.distributed_api.send(halo_size, np.int32(rank + 1), comm_tag) + r_send_req = sdc.distributed_api.isend( in_arr[-halo_size:], np.int32(halo_size), np.int32(rank + 1), comm_tag, True) - r_send_t_req = hpat.distributed_api.isend( + r_send_t_req = sdc.distributed_api.isend( on_arr[-halo_size:], np.int32(halo_size), np.int32(rank + 1), comm_tag, True) # recv left if rank != 0: - halo_size = hpat.distributed_api.recv(np.int64, np.int32(rank - 1), comm_tag) + halo_size = sdc.distributed_api.recv(np.int64, np.int32(rank - 1), comm_tag) l_recv_buff = np.empty(halo_size, dtype) - l_recv_req = hpat.distributed_api.irecv(l_recv_buff, np.int32(halo_size), np.int32(rank - 1), comm_tag, True) + l_recv_req = sdc.distributed_api.irecv(l_recv_buff, np.int32(halo_size), np.int32(rank - 1), comm_tag, True) l_recv_t_buff = np.empty(halo_size, np.int64) - l_recv_t_req = hpat.distributed_api.irecv( + l_recv_t_req = sdc.distributed_api.irecv( l_recv_t_buff, np.int32(halo_size), np.int32( rank - 1), comm_tag, True) @@ -1021,10 +1021,10 @@ def _border_icomm_var(in_arr, on_arr, rank, n_pes, win_size, dtype): # pragma: def _border_send_wait(r_send_req, l_send_req, rank, n_pes, center): # pragma: no cover # wait on send right if rank != n_pes - 1: - hpat.distributed_api.wait(r_send_req, True) + sdc.distributed_api.wait(r_send_req, True) # wait on send left if center and rank != 0: - hpat.distributed_api.wait(l_send_req, True) + sdc.distributed_api.wait(l_send_req, True) @numba.njit @@ -1036,7 +1036,7 @@ def _is_small_for_parallel(N, halo_size): # pragma: no cover # TODO: avoid reduce for obvious cases like no center and large 1D_Block # using 2*halo_size+1 to accomodate center cases with data on more than # 2 processor - num_small = hpat.distributed_api.dist_reduce( + num_small = sdc.distributed_api.dist_reduce( int(N <= 2 * halo_size + 1), np.int32(Reduce_Type.Sum.value)) return num_small != 0 @@ -1044,63 +1044,63 @@ def _is_small_for_parallel(N, halo_size): # pragma: no cover @numba.njit def _handle_small_data(in_arr, win, center, rank, n_pes, init_data, add_obs, remove_obs, calc_out): # pragma: no cover - all_N = hpat.distributed_api.dist_reduce( + all_N = sdc.distributed_api.dist_reduce( len(in_arr), np.int32(Reduce_Type.Sum.value)) - all_in_arr = hpat.distributed_api.gatherv(in_arr) + all_in_arr = sdc.distributed_api.gatherv(in_arr) if rank == 0: all_out, _ = roll_fixed_linear_generic_seq(all_in_arr, win, center, init_data, add_obs, remove_obs, calc_out) else: all_out = np.empty(all_N, np.float64) - hpat.distributed_api.bcast(all_out) - start = hpat.distributed_api.get_start(all_N, n_pes, rank) - end = hpat.distributed_api.get_end(all_N, n_pes, rank) + sdc.distributed_api.bcast(all_out) + start = sdc.distributed_api.get_start(all_N, n_pes, rank) + end = sdc.distributed_api.get_end(all_N, n_pes, rank) return all_out[start:end] @numba.njit def _handle_small_data_apply(in_arr, win, center, rank, n_pes, kernel_func): # pragma: no cover - all_N = hpat.distributed_api.dist_reduce( + all_N = sdc.distributed_api.dist_reduce( len(in_arr), np.int32(Reduce_Type.Sum.value)) - all_in_arr = hpat.distributed_api.gatherv(in_arr) + all_in_arr = sdc.distributed_api.gatherv(in_arr) if rank == 0: all_out = roll_fixed_apply_seq(all_in_arr, win, center, kernel_func) else: all_out = np.empty(all_N, np.float64) - hpat.distributed_api.bcast(all_out) - start = hpat.distributed_api.get_start(all_N, n_pes, rank) - end = hpat.distributed_api.get_end(all_N, n_pes, rank) + sdc.distributed_api.bcast(all_out) + start = sdc.distributed_api.get_start(all_N, n_pes, rank) + end = sdc.distributed_api.get_end(all_N, n_pes, rank) return all_out[start:end] @numba.njit def _handle_small_data_shift(in_arr, shift, rank, n_pes): # pragma: no cover - all_N = hpat.distributed_api.dist_reduce( + all_N = sdc.distributed_api.dist_reduce( len(in_arr), np.int32(Reduce_Type.Sum.value)) - all_in_arr = hpat.distributed_api.gatherv(in_arr) + all_in_arr = sdc.distributed_api.gatherv(in_arr) if rank == 0: all_out = shift_seq(all_in_arr, shift) else: all_out = np.empty(all_N, np.float64) - hpat.distributed_api.bcast(all_out) - start = hpat.distributed_api.get_start(all_N, n_pes, rank) - end = hpat.distributed_api.get_end(all_N, n_pes, rank) + sdc.distributed_api.bcast(all_out) + start = sdc.distributed_api.get_start(all_N, n_pes, rank) + end = sdc.distributed_api.get_end(all_N, n_pes, rank) return all_out[start:end] @numba.njit def _handle_small_data_pct_change(in_arr, shift, rank, n_pes): # pragma: no cover - all_N = hpat.distributed_api.dist_reduce( + all_N = sdc.distributed_api.dist_reduce( len(in_arr), np.int32(Reduce_Type.Sum.value)) - all_in_arr = hpat.distributed_api.gatherv(in_arr) + all_in_arr = sdc.distributed_api.gatherv(in_arr) if rank == 0: all_out = pct_change_seq(all_in_arr, shift) else: all_out = np.empty(all_N, np.float64) - hpat.distributed_api.bcast(all_out) - start = hpat.distributed_api.get_start(all_N, n_pes, rank) - end = hpat.distributed_api.get_end(all_N, n_pes, rank) + sdc.distributed_api.bcast(all_out) + start = sdc.distributed_api.get_start(all_N, n_pes, rank) + end = sdc.distributed_api.get_end(all_N, n_pes, rank) return all_out[start:end] @@ -1136,7 +1136,7 @@ def _is_small_for_parallel_variable(on_arr, win_size): # pragma: no cover start = on_arr[0] end = on_arr[-1] pe_range = end - start - num_small = hpat.distributed_api.dist_reduce( + num_small = sdc.distributed_api.dist_reduce( int(pe_range <= win_size), np.int32(Reduce_Type.Sum.value)) return num_small != 0 @@ -1144,38 +1144,38 @@ def _is_small_for_parallel_variable(on_arr, win_size): # pragma: no cover @numba.njit def _handle_small_data_variable(in_arr, on_arr, win, rank, n_pes, init_data, add_obs, remove_obs, calc_out): # pragma: no cover - all_N = hpat.distributed_api.dist_reduce( + all_N = sdc.distributed_api.dist_reduce( len(in_arr), np.int32(Reduce_Type.Sum.value)) - all_in_arr = hpat.distributed_api.gatherv(in_arr) - all_on_arr = hpat.distributed_api.gatherv(on_arr) + all_in_arr = sdc.distributed_api.gatherv(in_arr) + all_on_arr = sdc.distributed_api.gatherv(on_arr) if rank == 0: start, end = _build_indexer(all_on_arr, all_N, win, False, True) all_out = roll_var_linear_generic_seq(all_in_arr, all_on_arr, win, start, end, init_data, add_obs, remove_obs, calc_out) else: all_out = np.empty(all_N, np.float64) - hpat.distributed_api.bcast(all_out) - start = hpat.distributed_api.get_start(all_N, n_pes, rank) - end = hpat.distributed_api.get_end(all_N, n_pes, rank) + sdc.distributed_api.bcast(all_out) + start = sdc.distributed_api.get_start(all_N, n_pes, rank) + end = sdc.distributed_api.get_end(all_N, n_pes, rank) return all_out[start:end] @numba.njit def _handle_small_data_variable_apply(in_arr, on_arr, win, rank, n_pes, kernel_func): # pragma: no cover - all_N = hpat.distributed_api.dist_reduce( + all_N = sdc.distributed_api.dist_reduce( len(in_arr), np.int32(Reduce_Type.Sum.value)) - all_in_arr = hpat.distributed_api.gatherv(in_arr) - all_on_arr = hpat.distributed_api.gatherv(on_arr) + all_in_arr = sdc.distributed_api.gatherv(in_arr) + all_on_arr = sdc.distributed_api.gatherv(on_arr) if rank == 0: start, end = _build_indexer(all_on_arr, all_N, win, False, True) all_out = roll_variable_apply_seq(all_in_arr, all_on_arr, win, start, end, kernel_func) else: all_out = np.empty(all_N, np.float64) - hpat.distributed_api.bcast(all_out) - start = hpat.distributed_api.get_start(all_N, n_pes, rank) - end = hpat.distributed_api.get_end(all_N, n_pes, rank) + sdc.distributed_api.bcast(all_out) + start = sdc.distributed_api.get_start(all_N, n_pes, rank) + end = sdc.distributed_api.get_end(all_N, n_pes, rank) return all_out[start:end] diff --git a/hpat/hiframes/series_kernels.py b/sdc/hiframes/series_kernels.py similarity index 71% rename from hpat/hiframes/series_kernels.py rename to sdc/hiframes/series_kernels.py index 99ca0bb82..d33a92b1d 100644 --- a/hpat/hiframes/series_kernels.py +++ b/sdc/hiframes/series_kernels.py @@ -34,29 +34,29 @@ from numba.extending import overload from numba.typing.templates import infer_global, AbstractTemplate, signature -import hpat -from hpat.str_ext import string_type, unicode_to_std_str, std_str_to_unicode -from hpat.str_arr_ext import (string_array_type, StringArrayType, +import sdc +from sdc.str_ext import string_type, unicode_to_std_str, std_str_to_unicode +from sdc.str_arr_ext import (string_array_type, StringArrayType, is_str_arr_typ, pre_alloc_string_array, get_utf8_size) # float columns can have regular np.nan def _column_filter_impl(B, ind): # pragma: no cover - dtype = hpat.hiframes.api.shift_dtype(B.dtype) + dtype = sdc.hiframes.api.shift_dtype(B.dtype) A = np.empty(len(B), dtype) for i in numba.parfor.internal_prange(len(A)): if ind[i]: A[i] = B[i] else: - hpat.hiframes.join.setitem_arr_nan(A, i) - return hpat.hiframes.api.init_series(A) + sdc.hiframes.join.setitem_arr_nan(A, i) + return sdc.hiframes.api.init_series(A) def _column_count_impl(A): # pragma: no cover numba.parfor.init_prange() count = 0 for i in numba.parfor.internal_prange(len(A)): - if not hpat.hiframes.api.isna(A, i): + if not sdc.hiframes.api.isna(A, i): count += 1 res = count @@ -66,7 +66,7 @@ def _column_count_impl(A): # pragma: no cover def _column_fillna_impl(A, B, fill): # pragma: no cover for i in numba.parfor.internal_prange(len(A)): s = B[i] - if hpat.hiframes.api.isna(B, i): + if sdc.hiframes.api.isna(B, i): s = fill A[i] = s @@ -77,18 +77,18 @@ def _series_fillna_str_alloc_impl(B, fill, name): # pragma: no cover # get total chars in new array for i in numba.parfor.internal_prange(n): s = B[i] - if hpat.hiframes.api.isna(B, i): + if sdc.hiframes.api.isna(B, i): num_chars += len(fill) else: num_chars += len(s) - A = hpat.str_arr_ext.pre_alloc_string_array(n, num_chars) - hpat.hiframes.api.fillna(A, B, fill) - return hpat.hiframes.api.init_series(A, None, name) + A = sdc.str_arr_ext.pre_alloc_string_array(n, num_chars) + sdc.hiframes.api.fillna(A, B, fill) + return sdc.hiframes.api.init_series(A, None, name) def _series_dropna_float_impl(S, name): # pragma: no cover old_len = len(S) - new_len = old_len - hpat.hiframes.api.init_series(S).isna().sum() + new_len = old_len - sdc.hiframes.api.init_series(S).isna().sum() A = np.empty(new_len, S.dtype) curr_ind = 0 for i in numba.parfor.internal_prange(old_len): @@ -97,7 +97,7 @@ def _series_dropna_float_impl(S, name): # pragma: no cover A[curr_ind] = val curr_ind += 1 - return hpat.hiframes.api.init_series(A, None, name) + return sdc.hiframes.api.init_series(A, None, name) # using njit since 1D_var is broken for alloc when there is calculation of len @@ -108,20 +108,20 @@ def _series_dropna_str_alloc_impl_inner(B): # pragma: no cover old_len = len(B) na_count = 0 for i in range(len(B)): - if hpat.str_arr_ext.str_arr_is_na(B, i): + if sdc.str_arr_ext.str_arr_is_na(B, i): na_count += 1 # TODO: more efficient null counting new_len = old_len - na_count - num_chars = hpat.str_arr_ext.num_total_chars(B) - A = hpat.str_arr_ext.pre_alloc_string_array(new_len, num_chars) - hpat.str_arr_ext.copy_non_null_offsets(A, B) - hpat.str_arr_ext.copy_data(A, B) + num_chars = sdc.str_arr_ext.num_total_chars(B) + A = sdc.str_arr_ext.pre_alloc_string_array(new_len, num_chars) + sdc.str_arr_ext.copy_non_null_offsets(A, B) + sdc.str_arr_ext.copy_data(A, B) return A def _series_dropna_str_alloc_impl(B, name): # pragma: no cover - A = hpat.hiframes.series_kernels._series_dropna_str_alloc_impl_inner(B) - return hpat.hiframes.api.init_series(A, None, name) + A = sdc.hiframes.series_kernels._series_dropna_str_alloc_impl_inner(B) + return sdc.hiframes.api.init_series(A, None, name) # return the nan value for the type (handle dt64) @@ -145,7 +145,7 @@ def _get_type_max_value(dtype): @overload(_get_type_max_value) def _get_type_max_value_overload(dtype): if isinstance(dtype.dtype, (types.NPDatetime, types.NPTimedelta)): - return lambda dtype: hpat.hiframes.pd_timestamp_ext.integer_to_dt64( + return lambda dtype: sdc.hiframes.pd_timestamp_ext.integer_to_dt64( numba.targets.builtins.get_type_max_value(numba.types.int64)) return lambda dtype: numba.targets.builtins.get_type_max_value(dtype) @@ -153,7 +153,7 @@ def _get_type_max_value_overload(dtype): @numba.njit def _sum_handle_nan(s, count): # pragma: no cover if not count: - s = hpat.hiframes.series_kernels._get_nan(s) + s = sdc.hiframes.series_kernels._get_nan(s) return s @@ -180,7 +180,7 @@ def _column_sum_impl_count(A): # pragma: no cover s += val count += 1 - res = hpat.hiframes.series_kernels._sum_handle_nan(s, count) + res = sdc.hiframes.series_kernels._sum_handle_nan(s, count) return res @@ -216,7 +216,7 @@ def _column_mean_impl(A): # pragma: no cover s += val count += 1 - res = hpat.hiframes.series_kernels._mean_handle_nan(s, count) + res = sdc.hiframes.series_kernels._mean_handle_nan(s, count) return res @@ -240,7 +240,7 @@ def _column_var_impl(A): # pragma: no cover count_m += 1 numba.parfor.init_prange() - m = hpat.hiframes.series_kernels._mean_handle_nan(m, count_m) + m = sdc.hiframes.series_kernels._mean_handle_nan(m, count_m) s = 0 count = 0 for i in numba.parfor.internal_prange(len(A)): @@ -249,25 +249,25 @@ def _column_var_impl(A): # pragma: no cover s += (val - m)**2 count += 1 - res = hpat.hiframes.series_kernels._var_handle_nan(s, count) + res = sdc.hiframes.series_kernels._var_handle_nan(s, count) return res def _column_std_impl(A): # pragma: no cover - var = hpat.hiframes.api.var(A) + var = sdc.hiframes.api.var(A) return var**0.5 def _column_min_impl(in_arr): # pragma: no cover numba.parfor.init_prange() count = 0 - s = hpat.hiframes.series_kernels._get_type_max_value(in_arr.dtype) + s = sdc.hiframes.series_kernels._get_type_max_value(in_arr.dtype) for i in numba.parfor.internal_prange(len(in_arr)): val = in_arr[i] - if not hpat.hiframes.api.isna(in_arr, i): + if not sdc.hiframes.api.isna(in_arr, i): s = min(s, val) count += 1 - res = hpat.hiframes.series_kernels._sum_handle_nan(s, count) + res = sdc.hiframes.series_kernels._sum_handle_nan(s, count) return res @@ -275,9 +275,9 @@ def _column_min_impl_no_isnan(in_arr): # pragma: no cover numba.parfor.init_prange() s = numba.targets.builtins.get_type_max_value(numba.types.int64) for i in numba.parfor.internal_prange(len(in_arr)): - val = hpat.hiframes.pd_timestamp_ext.dt64_to_integer(in_arr[i]) + val = sdc.hiframes.pd_timestamp_ext.dt64_to_integer(in_arr[i]) s = min(s, val) - return hpat.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(s) + return sdc.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(s) # TODO: fix for dt64 @@ -291,7 +291,7 @@ def _column_max_impl(in_arr): # pragma: no cover if not np.isnan(val): s = max(s, val) count += 1 - res = hpat.hiframes.series_kernels._sum_handle_nan(s, count) + res = sdc.hiframes.series_kernels._sum_handle_nan(s, count) return res @@ -300,30 +300,30 @@ def _column_max_impl_no_isnan(in_arr): # pragma: no cover s = numba.targets.builtins.get_type_min_value(numba.types.int64) for i in numba.parfor.internal_prange(len(in_arr)): val = in_arr[i] - s = max(s, hpat.hiframes.pd_timestamp_ext.dt64_to_integer(val)) - return hpat.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(s) + s = max(s, sdc.hiframes.pd_timestamp_ext.dt64_to_integer(val)) + return sdc.hiframes.pd_timestamp_ext.convert_datetime64_to_timestamp(s) def _column_sub_impl_datetime_series_timestamp(in_arr, ts): # pragma: no cover numba.parfor.init_prange() n = len(in_arr) S = numba.unsafe.ndarray.empty_inferred((n,)) - tsint = hpat.hiframes.pd_timestamp_ext.convert_timestamp_to_datetime64(ts) + tsint = sdc.hiframes.pd_timestamp_ext.convert_timestamp_to_datetime64(ts) for i in numba.parfor.internal_prange(n): - S[i] = hpat.hiframes.pd_timestamp_ext.integer_to_timedelta64( - hpat.hiframes.pd_timestamp_ext.dt64_to_integer(in_arr[i]) - tsint) - return hpat.hiframes.api.init_series(S) + S[i] = sdc.hiframes.pd_timestamp_ext.integer_to_timedelta64( + sdc.hiframes.pd_timestamp_ext.dt64_to_integer(in_arr[i]) - tsint) + return sdc.hiframes.api.init_series(S) def _column_sub_impl_datetimeindex_timestamp(in_arr, ts): # pragma: no cover numba.parfor.init_prange() n = len(in_arr) S = numba.unsafe.ndarray.empty_inferred((n,)) - tsint = hpat.hiframes.pd_timestamp_ext.convert_timestamp_to_datetime64(ts) + tsint = sdc.hiframes.pd_timestamp_ext.convert_timestamp_to_datetime64(ts) for i in numba.parfor.internal_prange(n): - S[i] = hpat.hiframes.pd_timestamp_ext.integer_to_timedelta64( - hpat.hiframes.pd_timestamp_ext.dt64_to_integer(in_arr[i]) - tsint) - return hpat.hiframes.api.init_timedelta_index(S) + S[i] = sdc.hiframes.pd_timestamp_ext.integer_to_timedelta64( + sdc.hiframes.pd_timestamp_ext.dt64_to_integer(in_arr[i]) - tsint) + return sdc.hiframes.api.init_timedelta_index(S) def _column_describe_impl(S): # pragma: no cover @@ -352,17 +352,17 @@ def _column_describe_impl(S): # pragma: no cover def _column_fillna_alloc_impl(S, val, name): # pragma: no cover # TODO: handle string, etc. B = np.empty(len(S), S.dtype) - hpat.hiframes.api.fillna(B, S, val) - return hpat.hiframes.api.init_series(B, None, name) + sdc.hiframes.api.fillna(B, S, val) + return sdc.hiframes.api.init_series(B, None, name) def _str_contains_regex_impl(str_arr, pat): # pragma: no cover - e = hpat.str_ext.compile_regex(pat) - return hpat.hiframes.api.str_contains_regex(str_arr, e) + e = sdc.str_ext.compile_regex(pat) + return sdc.hiframes.api.str_contains_regex(str_arr, e) def _str_contains_noregex_impl(str_arr, pat): # pragma: no cover - return hpat.hiframes.api.str_contains_noregex(str_arr, pat) + return sdc.hiframes.api.str_contains_noregex(str_arr, pat) # TODO: use online algorithm, e.g. StatFunctions.scala @@ -390,17 +390,17 @@ def _column_corr_impl(S1, S2): # pragma: no cover def _series_append_single_impl(arr, other): - return hpat.hiframes.api.init_series( - hpat.hiframes.api.concat((arr, other))) + return sdc.hiframes.api.init_series( + sdc.hiframes.api.concat((arr, other))) def _series_append_tuple_impl(arr, other): - tup_other = hpat.hiframes.api.to_const_tuple(other) - tup_other = hpat.hiframes.api.series_tup_to_arr_tup(tup_other) + tup_other = sdc.hiframes.api.to_const_tuple(other) + tup_other = sdc.hiframes.api.series_tup_to_arr_tup(tup_other) arrs = (arr,) + tup_other - c_arrs = hpat.hiframes.api.to_const_tuple(arrs) - return hpat.hiframes.api.init_series( - hpat.hiframes.api.concat(c_arrs)) + c_arrs = sdc.hiframes.api.to_const_tuple(arrs) + return sdc.hiframes.api.init_series( + sdc.hiframes.api.concat(c_arrs)) def _series_isna_impl(arr): @@ -408,8 +408,8 @@ def _series_isna_impl(arr): n = len(arr) out_arr = np.empty(n, np.bool_) for i in numba.parfor.internal_prange(n): - out_arr[i] = hpat.hiframes.api.isna(arr, i) - return hpat.hiframes.api.init_series(out_arr) + out_arr[i] = sdc.hiframes.api.isna(arr, i) + return sdc.hiframes.api.init_series(out_arr) # def _series_astype_str_impl(arr): @@ -420,25 +420,25 @@ def _series_isna_impl(arr): # s = arr[i] # num_chars += len(str(s)) # TODO: check NA # -# A = hpat.str_arr_ext.pre_alloc_string_array(n, num_chars) +# A = sdc.str_arr_ext.pre_alloc_string_array(n, num_chars) # for i in numba.parfor.internal_prange(n): # s = arr[i] # A[i] = str(s) # TODO: check NA -# return hpat.hiframes.api.init_series(A) +# return sdc.hiframes.api.init_series(A) # def _str_replace_regex_impl(str_arr, pat, val): # numba.parfor.init_prange() -# e = hpat.str_ext.compile_regex(unicode_to_std_str(pat)) +# e = sdc.str_ext.compile_regex(unicode_to_std_str(pat)) # val = unicode_to_std_str(val) # n = len(str_arr) # n_total_chars = 0 -# str_list = hpat.str_ext.alloc_str_list(n) +# str_list = sdc.str_ext.alloc_str_list(n) # for i in numba.parfor.internal_prange(n): # # TODO: support unicode # in_str = unicode_to_std_str(str_arr[i]) # out_str = std_str_to_unicode( -# hpat.str_ext.str_replace_regex(in_str, e, val)) +# sdc.str_ext.str_replace_regex(in_str, e, val)) # str_list[i] = out_str # n_total_chars += len(out_str) # numba.parfor.init_prange() @@ -446,7 +446,7 @@ def _series_isna_impl(arr): # for i in numba.parfor.internal_prange(n): # _str = str_list[i] # out_arr[i] = _str -# return hpat.hiframes.api.init_series(out_arr) +# return sdc.hiframes.api.init_series(out_arr) def _str_replace_regex_impl(str_arr, pat, val): @@ -454,7 +454,7 @@ def _str_replace_regex_impl(str_arr, pat, val): e = re.compile(pat) n = len(str_arr) n_total_chars = 0 - str_list = hpat.str_ext.alloc_str_list(n) + str_list = sdc.str_ext.alloc_str_list(n) for i in numba.parfor.internal_prange(n): out_str = e.sub(val, str_arr[i]) str_list[i] = out_str @@ -464,7 +464,7 @@ def _str_replace_regex_impl(str_arr, pat, val): for i in numba.parfor.internal_prange(n): _str = str_list[i] out_arr[i] = _str - return hpat.hiframes.api.init_series(out_arr) + return sdc.hiframes.api.init_series(out_arr) # TODO: refactor regex and noregex @@ -475,12 +475,12 @@ def _str_replace_regex_impl(str_arr, pat, val): # val = unicode_to_std_str(val) # n = len(str_arr) # n_total_chars = 0 -# str_list = hpat.str_ext.alloc_str_list(n) +# str_list = sdc.str_ext.alloc_str_list(n) # for i in numba.parfor.internal_prange(n): # # TODO: support unicode # in_str = unicode_to_std_str(str_arr[i]) # out_str = std_str_to_unicode( -# hpat.str_ext.str_replace_noregex(in_str, e, val)) +# sdc.str_ext.str_replace_noregex(in_str, e, val)) # str_list[i] = out_str # n_total_chars += len(out_str) # numba.parfor.init_prange() @@ -488,14 +488,14 @@ def _str_replace_regex_impl(str_arr, pat, val): # for i in numba.parfor.internal_prange(n): # _str = str_list[i] # out_arr[i] = _str -# return hpat.hiframes.api.init_series(out_arr) +# return sdc.hiframes.api.init_series(out_arr) def _str_replace_noregex_impl(str_arr, pat, val): numba.parfor.init_prange() n = len(str_arr) n_total_chars = 0 - str_list = hpat.str_ext.alloc_str_list(n) + str_list = sdc.str_ext.alloc_str_list(n) for i in numba.parfor.internal_prange(n): out_str = str_arr[i].replace(pat, val) str_list[i] = out_str @@ -505,7 +505,7 @@ def _str_replace_noregex_impl(str_arr, pat, val): for i in numba.parfor.internal_prange(n): _str = str_list[i] out_arr[i] = _str - return hpat.hiframes.api.init_series(out_arr) + return sdc.hiframes.api.init_series(out_arr) @numba.njit @@ -527,20 +527,20 @@ def gt_f(a, b): 'min': defaultdict(lambda: _column_min_impl, [(types.NPDatetime('ns'), _column_min_impl_no_isnan)]), 'var': _column_var_impl, 'std': _column_std_impl, - 'nunique': lambda A: hpat.hiframes.api.nunique(A), - 'unique': lambda A: hpat.hiframes.api.unique(A), + 'nunique': lambda A: sdc.hiframes.api.nunique(A), + 'unique': lambda A: sdc.hiframes.api.unique(A), 'describe': _column_describe_impl, 'fillna_alloc': _column_fillna_alloc_impl, 'fillna_str_alloc': _series_fillna_str_alloc_impl, 'dropna_float': _series_dropna_float_impl, 'dropna_str_alloc': _series_dropna_str_alloc_impl, - 'shift': lambda A, shift: hpat.hiframes.api.init_series(hpat.hiframes.rolling.shift(A, shift, False)), - 'shift_default': lambda A: hpat.hiframes.api.init_series(hpat.hiframes.rolling.shift(A, 1, False)), - 'pct_change': lambda A, shift: hpat.hiframes.api.init_series(hpat.hiframes.rolling.pct_change(A, shift, False)), - 'pct_change_default': lambda A: hpat.hiframes.api.init_series(hpat.hiframes.rolling.pct_change(A, 1, False)), + 'shift': lambda A, shift: sdc.hiframes.api.init_series(sdc.hiframes.rolling.shift(A, shift, False)), + 'shift_default': lambda A: sdc.hiframes.api.init_series(sdc.hiframes.rolling.shift(A, 1, False)), + 'pct_change': lambda A, shift: sdc.hiframes.api.init_series(sdc.hiframes.rolling.pct_change(A, shift, False)), + 'pct_change_default': lambda A: sdc.hiframes.api.init_series(sdc.hiframes.rolling.pct_change(A, 1, False)), 'str_contains_regex': _str_contains_regex_impl, 'str_contains_noregex': _str_contains_noregex_impl, - # 'abs': lambda A: hpat.hiframes.api.init_series(np.abs(A)), # TODO: timedelta + # 'abs': lambda A: sdc.hiframes.api.init_series(np.abs(A)), # TODO: timedelta 'cov': _column_cov_impl, 'corr': _column_corr_impl, 'append_single': _series_append_single_impl, @@ -549,13 +549,13 @@ def gt_f(a, b): # isnull is just alias of isna 'isnull': _series_isna_impl, # 'astype_str': _series_astype_str_impl, - 'nlargest': lambda A, k, name: hpat.hiframes.api.init_series(hpat.hiframes.api.nlargest(A, k, True, gt_f), None, name), - 'nlargest_default': lambda A, name: hpat.hiframes.api.init_series(hpat.hiframes.api.nlargest(A, 5, True, gt_f), None, name), - 'nsmallest': lambda A, k, name: hpat.hiframes.api.init_series(hpat.hiframes.api.nlargest(A, k, False, lt_f), None, name), - 'nsmallest_default': lambda A, name: hpat.hiframes.api.init_series(hpat.hiframes.api.nlargest(A, 5, False, lt_f), None, name), - 'head': lambda A, I, k, name: hpat.hiframes.api.init_series(A[:k], None, name), - 'head_index': lambda A, I, k, name: hpat.hiframes.api.init_series(A[:k], I[:k], name), - 'median': lambda A: hpat.hiframes.api.median(A), + 'nlargest': lambda A, k, name: sdc.hiframes.api.init_series(sdc.hiframes.api.nlargest(A, k, True, gt_f), None, name), + 'nlargest_default': lambda A, name: sdc.hiframes.api.init_series(sdc.hiframes.api.nlargest(A, 5, True, gt_f), None, name), + 'nsmallest': lambda A, k, name: sdc.hiframes.api.init_series(sdc.hiframes.api.nlargest(A, k, False, lt_f), None, name), + 'nsmallest_default': lambda A, name: sdc.hiframes.api.init_series(sdc.hiframes.api.nlargest(A, 5, False, lt_f), None, name), + 'head': lambda A, I, k, name: sdc.hiframes.api.init_series(A[:k], None, name), + 'head_index': lambda A, I, k, name: sdc.hiframes.api.init_series(A[:k], I[:k], name), + 'median': lambda A: sdc.hiframes.api.median(A), # TODO: handle NAs in argmin/argmax # 'idxmin': lambda A: A.argmin(), # 'idxmax': lambda A: A.argmax(), diff --git a/hpat/hiframes/sort.py b/sdc/hiframes/sort.py similarity index 92% rename from hpat/hiframes/sort.py rename to sdc/hiframes/sort.py index 5ed27441f..20dbf2894 100644 --- a/hpat/hiframes/sort.py +++ b/sdc/hiframes/sort.py @@ -35,25 +35,25 @@ mk_unique_var) from numba.typing import signature from numba.extending import overload -import hpat -import hpat.timsort -from hpat.timsort import getitem_arr_tup -from hpat.utils import _numba_to_c_type_map -from hpat import distributed, distributed_analysis -from hpat.distributed_api import Reduce_Type -from hpat.distributed_analysis import Distribution -from hpat.utils import (debug_prints, empty_like_type, get_ctypes_ptr, +import sdc +import sdc.timsort +from sdc.timsort import getitem_arr_tup +from sdc.utils import _numba_to_c_type_map +from sdc import distributed, distributed_analysis +from sdc.distributed_api import Reduce_Type +from sdc.distributed_analysis import Distribution +from sdc.utils import (debug_prints, empty_like_type, get_ctypes_ptr, gen_getitem) -from hpat.shuffle_utils import (alltoallv, alltoallv_tup, +from sdc.shuffle_utils import (alltoallv, alltoallv_tup, finalize_shuffle_meta, update_shuffle_meta, alloc_pre_shuffle_metadata, _get_keys_tup, _get_data_tup) -from hpat.str_arr_ext import (string_array_type, to_string_list, +from sdc.str_arr_ext import (string_array_type, to_string_list, cp_str_list_to_array, str_list_to_array, get_offset_ptr, get_data_ptr, convert_len_arr_to_offset, pre_alloc_string_array, num_total_chars) -from hpat.str_ext import string_type +from sdc.str_ext import string_type MIN_SAMPLES = 1000000 @@ -225,7 +225,7 @@ def visit_vars_sort(sort_node, callback, cbdata): def remove_dead_sort(sort_node, lives, arg_aliases, alias_map, func_ir, typemap): # TODO: remove this feature - if not hpat.hiframes.api.enable_hiframes_remove_dead: + if not sdc.hiframes.api.enable_hiframes_remove_dead: return sort_node # TODO: arg aliases for inplace case? @@ -338,18 +338,18 @@ def sort_distributed_run(sort_node, array_dists, typemap, calltypes, typingctx, func_text += " key_arrs = ({},)\n".format(key_name_args) # single value needs comma to become tuple func_text += " data = ({}{})\n".format(col_name_args, "," if len(in_vars) == 1 else "") - func_text += " hpat.hiframes.sort.local_sort(key_arrs, data, {})\n".format(sort_node.ascending) + func_text += " sdc.hiframes.sort.local_sort(key_arrs, data, {})\n".format(sort_node.ascending) func_text += " return key_arrs, data\n" loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) sort_impl = loc_vars['f'] key_typ = types.Tuple([typemap[v.name] for v in key_arrs]) data_tup_typ = types.Tuple([typemap[v.name] for v in in_vars]) f_block = compile_to_numba_ir(sort_impl, - {'hpat': hpat, + {'sdc': sdc, 'to_string_list': to_string_list, 'cp_str_list_to_array': cp_str_list_to_array}, typingctx, @@ -383,11 +383,11 @@ def par_sort_impl(key_arrs, data, ascending): out_key, out_data = parallel_sort(key_arrs, data, ascending) # TODO: use k-way merge instead of sort # sort output - hpat.hiframes.sort.local_sort(out_key, out_data, ascending) + sdc.hiframes.sort.local_sort(out_key, out_data, ascending) return out_key, out_data f_block = compile_to_numba_ir(par_sort_impl, - {'hpat': hpat, + {'sdc': sdc, 'parallel_sort': parallel_sort, 'to_string_list': to_string_list, 'cp_str_list_to_array': cp_str_list_to_array}, @@ -433,7 +433,7 @@ def _impl(arr): def to_string_list_typ(typ): if typ == string_array_type: - return types.List(hpat.str_ext.string_type) + return types.List(sdc.str_ext.string_type) if isinstance(typ, (types.Tuple, types.UniTuple)): new_typs = [] @@ -451,9 +451,9 @@ def local_sort(key_arrs, data, ascending=True): l_key_arrs = to_string_list(key_arrs) l_data = to_string_list(data) n_out = len(key_arrs[0]) - hpat.timsort.sort(l_key_arrs, 0, n_out, l_data) + sdc.timsort.sort(l_key_arrs, 0, n_out, l_data) if not ascending: - hpat.timsort.reverseRange(l_key_arrs, 0, n_out, l_data) + sdc.timsort.reverseRange(l_key_arrs, 0, n_out, l_data) cp_str_list_to_array(key_arrs, l_key_arrs) cp_str_list_to_array(data, l_data) @@ -461,10 +461,10 @@ def local_sort(key_arrs, data, ascending=True): @numba.njit(no_cpython_wrapper=True, cache=True) def parallel_sort(key_arrs, data, ascending=True): n_local = len(key_arrs[0]) - n_total = hpat.distributed_api.dist_reduce(n_local, np.int32(Reduce_Type.Sum.value)) + n_total = sdc.distributed_api.dist_reduce(n_local, np.int32(Reduce_Type.Sum.value)) - n_pes = hpat.distributed_api.get_size() - my_rank = hpat.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() + my_rank = sdc.distributed_api.get_rank() # similar to Spark's sample computation Partitioner.scala sampleSize = min(samplePointsPerPartitionHint * n_pes, MIN_SAMPLES) @@ -475,7 +475,7 @@ def parallel_sort(key_arrs, data, ascending=True): samples = key_arrs[0][inds] # print(sampleSize, fraction, n_local, n_loc_samples, len(samples)) - all_samples = hpat.distributed_api.gatherv(samples) + all_samples = sdc.distributed_api.gatherv(samples) all_samples = to_string_list(all_samples) bounds = empty_like_type(n_pes - 1, all_samples) @@ -490,8 +490,8 @@ def parallel_sort(key_arrs, data, ascending=True): # print(bounds) bounds = str_list_to_array(bounds) - bounds = hpat.distributed_api.prealloc_str_for_bcast(bounds) - hpat.distributed_api.bcast(bounds) + bounds = sdc.distributed_api.prealloc_str_for_bcast(bounds) + sdc.distributed_api.bcast(bounds) # calc send/recv counts pre_shuffle_meta = alloc_pre_shuffle_metadata(key_arrs, data, n_pes, True) diff --git a/hpat/hiframes/split_impl.py b/sdc/hiframes/split_impl.py similarity index 99% rename from hpat/hiframes/split_impl.py rename to sdc/hiframes/split_impl.py index dad6c8019..979f0d07c 100644 --- a/hpat/hiframes/split_impl.py +++ b/sdc/hiframes/split_impl.py @@ -28,7 +28,7 @@ import operator import numpy as np import numba -import hpat +import sdc from numba import types from numba.typing.templates import (infer_global, AbstractTemplate, infer, signature, AttributeTemplate, infer_getattr, bound_function) @@ -37,10 +37,10 @@ make_attribute_wrapper, lower_builtin, box, unbox, lower_getattr, intrinsic, overload_method, overload, overload_attribute) from numba import cgutils -from hpat.str_ext import string_type +from sdc.str_ext import string_type from numba.targets.imputils import (impl_ret_new_ref, impl_ret_borrowed, iternext_impl, RefType) -from hpat.str_arr_ext import (string_array_type, get_data_ptr, +from sdc.str_arr_ext import (string_array_type, get_data_ptr, is_str_arr_typ, pre_alloc_string_array, _memcpy) import llvmlite.llvmpy.core as lc @@ -434,7 +434,7 @@ def _impl(A, ind): end_index = getitem_c_arr(A._index_offsets, ind + 1) n = end_index - start_index - 1 - str_list = hpat.str_ext.alloc_str_list(n) + str_list = sdc.str_ext.alloc_str_list(n) for i in range(n): data_start = getitem_c_arr( A._data_offsets, start_index + i) diff --git a/hpat/io/__init__.py b/sdc/io/__init__.py similarity index 100% rename from hpat/io/__init__.py rename to sdc/io/__init__.py diff --git a/hpat/io/_csv.cpp b/sdc/io/_csv.cpp similarity index 98% rename from hpat/io/_csv.cpp rename to sdc/io/_csv.cpp index 8788c4ddc..28cd07289 100644 --- a/hpat/io/_csv.cpp +++ b/sdc/io/_csv.cpp @@ -252,7 +252,7 @@ static PyMethodDef stream_reader_methods[] = { // the actual Python type class static PyTypeObject stream_reader_type = { - PyObject_HEAD_INIT(NULL) "hpat.hio.StreamReader", /*tp_name*/ + PyObject_HEAD_INIT(NULL) "sdc.hio.StreamReader", /*tp_name*/ sizeof(stream_reader), /*tp_basicsize*/ 0, /*tp_itemsize*/ (destructor)stream_reader_dealloc, /*tp_dealloc*/ @@ -330,11 +330,11 @@ static PyObject* csv_chunk_reader(std::istream* f, size_t fsz, bool is_parallel, std::string transport_func_name; if (is_parallel) { - transport_func_name = "hpat.transport_mpi"; + transport_func_name = "sdc.transport_mpi"; } else { - transport_func_name = "hpat.transport_seq"; + transport_func_name = "sdc.transport_seq"; } hpat_mpi_csv_get_offsets hpat_mpi_csv_get_offsets_ptr = diff --git a/hpat/io/_csv.h b/sdc/io/_csv.h similarity index 100% rename from hpat/io/_csv.h rename to sdc/io/_csv.h diff --git a/hpat/io/_hdf5.cpp b/sdc/io/_hdf5.cpp similarity index 100% rename from hpat/io/_hdf5.cpp rename to sdc/io/_hdf5.cpp diff --git a/hpat/io/_io.cpp b/sdc/io/_io.cpp similarity index 100% rename from hpat/io/_io.cpp rename to sdc/io/_io.cpp diff --git a/hpat/io/_parquet.cpp b/sdc/io/_parquet.cpp similarity index 100% rename from hpat/io/_parquet.cpp rename to sdc/io/_parquet.cpp diff --git a/hpat/io/_xe_wrapper.cpp b/sdc/io/_xe_wrapper.cpp similarity index 100% rename from hpat/io/_xe_wrapper.cpp rename to sdc/io/_xe_wrapper.cpp diff --git a/hpat/io/csv_ext.py b/sdc/io/csv_ext.py similarity index 94% rename from hpat/io/csv_ext.py rename to sdc/io/csv_ext.py index 8f9e454c2..fcce2d26d 100644 --- a/hpat/io/csv_ext.py +++ b/sdc/io/csv_ext.py @@ -35,23 +35,23 @@ from numba.extending import overload, intrinsic, register_model, models, box from numba.ir_utils import (visit_vars_inner, replace_vars_inner, compile_to_numba_ir, replace_arg_nodes) -import hpat -from hpat import distributed, distributed_analysis -from hpat.utils import debug_prints, alloc_arr_tup, empty_like_type -from hpat.distributed_analysis import Distribution -from hpat.str_ext import string_type -from hpat.str_arr_ext import (string_array_type, to_string_list, +import sdc +from sdc import distributed, distributed_analysis +from sdc.utils import debug_prints, alloc_arr_tup, empty_like_type +from sdc.distributed_analysis import Distribution +from sdc.str_ext import string_type +from sdc.str_arr_ext import (string_array_type, to_string_list, cp_str_list_to_array, str_list_to_array, get_offset_ptr, get_data_ptr, convert_len_arr_to_offset, pre_alloc_string_array, num_total_chars, getitem_str_offset, copy_str_arr_slice) -from hpat.timsort import copyElement_tup, getitem_arr_tup -from hpat.utils import _numba_to_c_type_map -from hpat import objmode +from sdc.timsort import copyElement_tup, getitem_arr_tup +from sdc.utils import _numba_to_c_type_map +from sdc import objmode import pandas as pd import numpy as np -from hpat.hiframes.pd_categorical_ext import (PDCategoricalDtype, CategoricalArray) +from sdc.hiframes.pd_categorical_ext import (PDCategoricalDtype, CategoricalArray) class CsvReader(ir.Stmt): @@ -229,7 +229,7 @@ def build_csv_definitions(csv_node, definitions=None): def csv_distributed_run(csv_node, array_dists, typemap, calltypes, typingctx, targetctx, dist_pass): parallel = True - if hpat.config.config_transport_mpi: + if sdc.config.config_transport_mpi: for v in csv_node.out_vars: if (array_dists[v.name] != distributed.Distribution.OneD and array_dists[v.name] != distributed.Distribution.OneD_Var): @@ -266,10 +266,10 @@ def csv_distributed_run(csv_node, array_dists, typemap, calltypes, typingctx, ta # TODO: get global size from C for arr in csv_node.out_vars: def f(A): - return hpat.distributed_api.dist_reduce(len(A), np.int32(_op)) + return sdc.distributed_api.dist_reduce(len(A), np.int32(_op)) f_block = compile_to_numba_ir( - f, {'hpat': hpat, 'np': np, - '_op': hpat.distributed_api.Reduce_Type.Sum.value}, + f, {'sdc': sdc, 'np': np, + '_op': sdc.distributed_api.Reduce_Type.Sum.value}, typingctx, (typemap[arr.name],), typemap, calltypes).blocks.popitem()[1] replace_arg_nodes(f_block, [arr]) nodes += f_block.body[:-2] @@ -277,7 +277,7 @@ def f(A): dist_pass._array_sizes[arr.name] = [size_var] out, start_var, end_var = dist_pass._gen_1D_div( size_var, arr.scope, csv_node.loc, "$alloc", "get_node_portion", - hpat.distributed_api.get_node_portion) + sdc.distributed_api.get_node_portion) dist_pass._array_starts[arr.name] = [start_var] dist_pass._array_counts[arr.name] = [end_var] nodes += out diff --git a/hpat/io/np_io.py b/sdc/io/np_io.py similarity index 96% rename from hpat/io/np_io.py rename to sdc/io/np_io.py index bf042bc0e..d0cd008c5 100644 --- a/hpat/io/np_io.py +++ b/sdc/io/np_io.py @@ -27,11 +27,11 @@ import numpy as np import numba -import hpat +import sdc from numba import types, cgutils from numba.targets.arrayobj import make_array from numba.extending import overload, intrinsic, overload_method -from hpat.str_ext import string_type +from sdc.str_ext import string_type from numba.ir_utils import (compile_to_numba_ir, replace_arg_nodes, find_callname, guard) @@ -86,7 +86,7 @@ def _handle_np_fromfile(assign, lhs, rhs): # FIXME: import here since hio has hdf5 which might not be available from .. import hio - if hpat.config.config_transport_mpi: + if sdc.config.config_transport_mpi: from .. import transport_mpi as transport else: from .. import transport_seq as transport @@ -130,7 +130,7 @@ def codegen(context, builder, sig, args): def tofile_overload(arr_ty, fname_ty): # FIXME: import here since hio has hdf5 which might not be available from .. import hio - if hpat.config.config_transport_mpi: + if sdc.config.config_transport_mpi: from .. import transport_mpi as transport else: from .. import transport_seq as transport @@ -169,8 +169,8 @@ def file_write_parallel_overload(fname, arr, start, count): def _impl(fname, arr, start, count): A = np.ascontiguousarray(arr) dtype_size = get_dtype_size(A.dtype) - elem_size = dtype_size * hpat.distributed_lower.get_tuple_prod(A.shape[1:]) - # hpat.cprint(start, count, elem_size) + elem_size = dtype_size * sdc.distributed_lower.get_tuple_prod(A.shape[1:]) + # sdc.cprint(start, count, elem_size) _file_write_parallel(fname._data, A.ctypes, start, count, elem_size) return _impl diff --git a/hpat/io/parquet_pio.py b/sdc/io/parquet_pio.py similarity index 97% rename from hpat/io/parquet_pio.py rename to sdc/io/parquet_pio.py index 1198cc76d..7505d90ac 100644 --- a/hpat/io/parquet_pio.py +++ b/sdc/io/parquet_pio.py @@ -25,7 +25,7 @@ # ***************************************************************************** -from hpat.config import _has_pyarrow +from sdc.config import _has_pyarrow import llvmlite.binding as ll from llvmlite import ir as lir from numba.targets.arrayobj import make_array @@ -44,11 +44,11 @@ from numba.typing import signature from numba.targets.imputils import impl_ret_new_ref, impl_ret_borrowed import numpy as np -import hpat -from hpat.str_ext import string_type, unicode_to_char_ptr -from hpat.str_arr_ext import StringArray, StringArrayPayloadType, construct_string_array -from hpat.str_arr_ext import string_array_type -from hpat.utils import unliteral_all +import sdc +from sdc.str_ext import string_type, unicode_to_char_ptr +from sdc.str_arr_ext import StringArray, StringArrayPayloadType, construct_string_array +from sdc.str_arr_ext import string_array_type +from sdc.utils import unliteral_all # from parquet/types.h @@ -192,7 +192,7 @@ def get_column_read_nodes(c_type, cvar, arrow_readers_var, i): if el_type == repr(types.NPDatetime('ns')): func_text += ' column_tmp = np.empty(col_size, dtype=np.int64)\n' # TODO: fix alloc - func_text += ' column = hpat.hiframes.api.ts_series_to_arr_typ(column_tmp)\n' + func_text += ' column = sdc.hiframes.api.ts_series_to_arr_typ(column_tmp)\n' else: func_text += ' column = np.empty(col_size, dtype=np.{})\n'.format( el_type) @@ -200,14 +200,14 @@ def get_column_read_nodes(c_type, cvar, arrow_readers_var, i): i, _type_to_pq_dtype_number[el_type]) loc_vars = {} - exec(func_text, {'hpat': hpat, 'np': np}, loc_vars) + exec(func_text, {'sdc': sdc, 'np': np}, loc_vars) size_func = loc_vars['f'] _, f_block = compile_to_numba_ir(size_func, {'get_column_size_parquet': get_column_size_parquet, 'read_parquet': read_parquet, 'read_parquet_str': read_parquet_str, 'np': np, - 'hpat': hpat, + 'sdc': sdc, 'StringArray': StringArray}).blocks.popitem() replace_arg_nodes(f_block, [arrow_readers_var]) diff --git a/hpat/io/pio.py b/sdc/io/pio.py similarity index 94% rename from hpat/io/pio.py rename to sdc/io/pio.py index 239d6a2c6..94c68a919 100644 --- a/hpat/io/pio.py +++ b/sdc/io/pio.py @@ -38,11 +38,11 @@ import numpy as np -import hpat -from hpat import utils -import hpat.io -from hpat.io import pio_api, pio_lower -from hpat.utils import find_str_const, debug_prints +import sdc +from sdc import utils +import sdc.io +from sdc.io import pio_api, pio_lower +from sdc.utils import find_str_const, debug_prints def remove_h5(rhs, lives, call_list): @@ -71,11 +71,11 @@ def handle_possible_h5_read(self, assign, lhs, rhs): dtype_str = str(tp.dtype) func_text = "def _h5_read_impl(dset, index):\n" # TODO: index arg? - func_text += " arr = hpat.io.pio_api.h5_read_dummy(dset, {}, '{}', index)\n".format(tp.ndim, dtype_str) + func_text += " arr = sdc.io.pio_api.h5_read_dummy(dset, {}, '{}', index)\n".format(tp.ndim, dtype_str) loc_vars = {} - exec(func_text, {'hpat': hpat}, loc_vars) + exec(func_text, {'sdc': sdc}, loc_vars) _h5_read_impl = loc_vars['_h5_read_impl'] - f_block = compile_to_numba_ir(_h5_read_impl, {'hpat': hpat}).blocks.popitem()[1] + f_block = compile_to_numba_ir(_h5_read_impl, {'sdc': sdc}).blocks.popitem()[1] index_var = rhs.index if rhs.op == 'getitem' else rhs.index_var replace_arg_nodes(f_block, [rhs.value, index_var]) nodes = f_block.body[:-3] # remove none return diff --git a/hpat/io/pio_api.py b/sdc/io/pio_api.py similarity index 91% rename from hpat/io/pio_api.py rename to sdc/io/pio_api.py index 8f6e2873d..2b37810bd 100644 --- a/hpat/io/pio_api.py +++ b/sdc/io/pio_api.py @@ -35,14 +35,14 @@ from numba.typing.templates import infer_global, AbstractTemplate, AttributeTemplate, bound_function from numba.extending import register_model, models, infer_getattr, infer, intrinsic -import hpat -import hpat.io -from hpat.str_ext import string_type -from hpat.utils import unliteral_all +import sdc +import sdc.io +from sdc.str_ext import string_type +from sdc.utils import unliteral_all -if hpat.config._has_h5py: +if sdc.config._has_h5py: import h5py - from hpat.io import _hdf5 + from sdc.io import _hdf5 import llvmlite.binding as ll ll.add_symbol('hpat_h5_read_filter', _hdf5.hpat_h5_read_filter) @@ -82,7 +82,7 @@ def __init__(self): h5file_data_type = types.int64 -if hpat.config._has_h5py: +if sdc.config._has_h5py: # hid_t is 32bit in 1.8 but 64bit in 1.10 if h5py.version.hdf5_version_tuple[1] == 8: h5file_data_type = types.int32 @@ -234,7 +234,7 @@ def generic(self, args, kws): return signature(ret_typ, *args) -if hpat.config._has_h5py: +if sdc.config._has_h5py: @infer_global(h5py.File) class H5File(AbstractTemplate): def generic(self, args, kws): @@ -306,38 +306,38 @@ def generic(self, args, kws): return signature(string_type, *args) -sum_op = hpat.distributed_api.Reduce_Type.Sum.value +sum_op = sdc.distributed_api.Reduce_Type.Sum.value @numba.njit def get_filter_read_indices(bool_arr): indices = bool_arr.nonzero()[0] - rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() + rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() # get number of elements before this processor to align the indices # assuming bool_arr can be 1D_Var all_starts = np.empty(n_pes, np.int64) n_bool = len(bool_arr) - hpat.distributed_api.allgather(all_starts, n_bool) + sdc.distributed_api.allgather(all_starts, n_bool) ind_start = all_starts.cumsum()[rank] - n_bool - #n_arr = hpat.distributed_api.dist_reduce(len(bool_arr), np.int32(sum_op)) - #ind_start = hpat.distributed_api.get_start(n_arr, n_pes, rank) + #n_arr = sdc.distributed_api.dist_reduce(len(bool_arr), np.int32(sum_op)) + #ind_start = sdc.distributed_api.get_start(n_arr, n_pes, rank) indices += ind_start # TODO: use prefix-sum and all-to-all # all_indices = np.empty(n, indices.dtype) # allgatherv(all_indices, indices) - n = hpat.distributed_api.dist_reduce(len(indices), np.int32(sum_op)) - inds = hpat.distributed_api.gatherv(indices) + n = sdc.distributed_api.dist_reduce(len(indices), np.int32(sum_op)) + inds = sdc.distributed_api.gatherv(indices) if rank == 0: all_indices = inds else: all_indices = np.empty(n, indices.dtype) - hpat.distributed_api.bcast(all_indices) + sdc.distributed_api.bcast(all_indices) - start = hpat.distributed_api.get_start(n, n_pes, rank) - end = hpat.distributed_api.get_end(n, n_pes, rank) + start = sdc.distributed_api.get_start(n, n_pes, rank) + end = sdc.distributed_api.get_end(n, n_pes, rank) return all_indices[start:end] @@ -368,6 +368,6 @@ def codegen(context, builder, sig, args): def h5read_filter(dset_id, ndim, starts, counts, is_parallel, out_arr, read_indices): starts_ptr = tuple_to_ptr(starts) counts_ptr = tuple_to_ptr(counts) - type_enum = hpat.distributed_api.get_type_enum(out_arr) + type_enum = sdc.distributed_api.get_type_enum(out_arr) return _h5read_filter(dset_id, ndim, starts_ptr, counts_ptr, is_parallel, out_arr.ctypes, type_enum, read_indices.ctypes, len(read_indices)) diff --git a/hpat/io/pio_lower.py b/sdc/io/pio_lower.py similarity index 96% rename from hpat/io/pio_lower.py rename to sdc/io/pio_lower.py index 1520aa99f..3c5075b9a 100644 --- a/hpat/io/pio_lower.py +++ b/sdc/io/pio_lower.py @@ -29,18 +29,18 @@ from numba import types, cgutils from numba.targets.imputils import lower_builtin from numba.targets.arrayobj import make_array -import hpat.io -from hpat.io import pio_api -from hpat.utils import _numba_to_c_type_map -from hpat.io.pio_api import (h5file_type, h5dataset_or_group_type, h5dataset_type, h5group_type) -from hpat.str_ext import string_type, gen_get_unicode_chars, gen_std_str_to_unicode +import sdc.io +from sdc.io import pio_api +from sdc.utils import _numba_to_c_type_map +from sdc.io.pio_api import (h5file_type, h5dataset_or_group_type, h5dataset_type, h5group_type) +from sdc.str_ext import string_type, gen_get_unicode_chars, gen_std_str_to_unicode from llvmlite import ir as lir import llvmlite.binding as ll -import hpat.io -if hpat.config._has_h5py: +import sdc.io +if sdc.config._has_h5py: import h5py - from hpat.io import _hdf5 + from sdc.io import _hdf5 ll.add_symbol('hpat_h5_open', _hdf5.hpat_h5_open) ll.add_symbol('hpat_h5_open_dset_or_group_obj', _hdf5.hpat_h5_open_dset_or_group_obj) ll.add_symbol('hpat_h5_size', _hdf5.hpat_h5_size) @@ -56,7 +56,7 @@ h5file_lir_type = lir.IntType(64) -if hpat.config._has_h5py: +if sdc.config._has_h5py: # hid_t is 32bit in 1.8 but 64bit in 1.10 if h5py.version.hdf5_version_tuple[1] == 8: h5file_lir_type = lir.IntType(32) @@ -77,7 +77,7 @@ def h5_open_dset_lower(context, builder, sig, args): return builder.call(fn, [fg_id, dset_name]) -if hpat.config._has_h5py: +if sdc.config._has_h5py: @lower_builtin(h5py.File, string_type, string_type) @lower_builtin(h5py.File, string_type, string_type, types.int64) def h5_open(context, builder, sig, args): diff --git a/hpat/io/xenon_ext.py b/sdc/io/xenon_ext.py similarity index 98% rename from hpat/io/xenon_ext.py rename to sdc/io/xenon_ext.py index 92f2e2a11..b1499a575 100644 --- a/hpat/io/xenon_ext.py +++ b/sdc/io/xenon_ext.py @@ -44,10 +44,10 @@ from numba.targets.imputils import impl_ret_new_ref, lower_builtin from numba.targets.arrayobj import make_array -import hpat -from hpat.utils import get_constant, NOT_CONSTANT -from hpat.str_ext import string_type, unicode_to_char_ptr -from hpat.str_arr_ext import StringArray, StringArrayPayloadType, construct_string_array, string_array_type +import sdc +from sdc.utils import get_constant, NOT_CONSTANT +from sdc.str_ext import string_type, unicode_to_char_ptr +from sdc.str_arr_ext import StringArray, StringArrayPayloadType, construct_string_array, string_array_type def remove_xenon(rhs, lives, call_list): @@ -71,7 +71,7 @@ def read_xenon(): def _handle_read(assign, lhs, rhs, func_ir): - if not hpat.config._has_xenon: + if not sdc.config._has_xenon: raise ValueError("Xenon support not available") # TODO: init only once diff --git a/hpat/ml/__init__.py b/sdc/ml/__init__.py similarity index 97% rename from hpat/ml/__init__.py rename to sdc/ml/__init__.py index a567400aa..84575b8cd 100644 --- a/hpat/ml/__init__.py +++ b/sdc/ml/__init__.py @@ -27,4 +27,4 @@ from .svc import SVC from .naive_bayes import MultinomialNB -from hpat.utils import debug_prints +from sdc.utils import debug_prints diff --git a/hpat/ml/naive_bayes.py b/sdc/ml/naive_bayes.py similarity index 99% rename from hpat/ml/naive_bayes.py rename to sdc/ml/naive_bayes.py index da68f4171..f18c43dfc 100644 --- a/hpat/ml/naive_bayes.py +++ b/sdc/ml/naive_bayes.py @@ -28,8 +28,8 @@ from llvmlite import ir as lir import llvmlite.binding as ll import numba -import hpat -from hpat.utils import debug_prints +import sdc +from sdc.utils import debug_prints from numba import types from numba.typing.templates import (infer_global, AbstractTemplate, infer, signature, AttributeTemplate, infer_getattr, bound_function) diff --git a/hpat/ml/svc.py b/sdc/ml/svc.py similarity index 99% rename from hpat/ml/svc.py rename to sdc/ml/svc.py index c38c843f2..470495e72 100644 --- a/hpat/ml/svc.py +++ b/sdc/ml/svc.py @@ -28,8 +28,8 @@ from llvmlite import ir as lir import llvmlite.binding as ll import numba -import hpat -from hpat.utils import debug_prints +import sdc +from sdc.utils import debug_prints from numba import types from numba.typing.templates import (infer_global, AbstractTemplate, infer, signature, AttributeTemplate, infer_getattr, bound_function) diff --git a/hpat/ros.py b/sdc/ros.py similarity index 91% rename from hpat/ros.py rename to sdc/ros.py index 0ba2acd0a..5339d5a6c 100644 --- a/hpat/ros.py +++ b/sdc/ros.py @@ -35,22 +35,22 @@ from numba.typing import signature from numba.typing.templates import infer_global, AbstractTemplate from numba.extending import models, register_model, lower_builtin -import hpat +import sdc import numpy as np def read_ros_images(f_name): # implementation to enable regular python def f(file_name): # pragma: no cover - bag = hpat.ros.open_bag(file_name) - num_msgs = hpat.ros.get_msg_count(bag) - m, n = hpat.ros.get_image_dims(bag) - # hpat.cprint(num_msgs, m, n) + bag = sdc.ros.open_bag(file_name) + num_msgs = sdc.ros.get_msg_count(bag) + m, n = sdc.ros.get_image_dims(bag) + # sdc.cprint(num_msgs, m, n) A = np.empty((num_msgs, m, n, 3), dtype=np.uint8) - s = hpat.ros.read_ros_images_inner(A, bag) + s = sdc.ros.read_ros_images_inner(A, bag) return A - return hpat.jit(f)(f_name) + return sdc.jit(f)(f_name) # inner functions @@ -79,15 +79,15 @@ def _handle_read_images(lhs, rhs): fname = rhs.args[0] def f(file_name): # pragma: no cover - bag = hpat.ros.open_bag(file_name) - _num_msgs = hpat.ros.get_msg_count(bag) - _ros_m, _ros_n = hpat.ros.get_image_dims(bag) - # hpat.cprint(num_msgs, m, n) + bag = sdc.ros.open_bag(file_name) + _num_msgs = sdc.ros.get_msg_count(bag) + _ros_m, _ros_n = sdc.ros.get_image_dims(bag) + # sdc.cprint(num_msgs, m, n) _in_ros_arr = np.empty((_num_msgs, _ros_m, _ros_n, 3), dtype=np.uint8) - _ret = hpat.ros.read_ros_images_inner(_in_ros_arr, bag) + _ret = sdc.ros.read_ros_images_inner(_in_ros_arr, bag) f_block = compile_to_numba_ir( - f, {'np': np, 'hpat': hpat}).blocks.popitem()[1] + f, {'np': np, 'sdc': sdc}).blocks.popitem()[1] replace_arg_nodes(f_block, [fname]) nodes = f_block.body[:-3] # remove none return A_var = nodes[-2].value.args[0] @@ -161,7 +161,7 @@ def generic(self, args, kws): ll.add_symbol('read_images_parallel', ros_cpp.read_images_parallel) -@lower_builtin(open_bag, hpat.string_type) +@lower_builtin(open_bag, sdc.string_type) def lower_open_bag(context, builder, sig, args): fnty = lir.FunctionType(lir.IntType(8).as_pointer(), [lir.IntType(8).as_pointer()]) diff --git a/hpat/runtests.py b/sdc/runtests.py similarity index 86% rename from hpat/runtests.py rename to sdc/runtests.py index 9e8a318eb..a8ca6e882 100644 --- a/hpat/runtests.py +++ b/sdc/runtests.py @@ -27,8 +27,8 @@ import os import unittest -import hpat.tests -from hpat.tests.test_basic import get_rank +import sdc.tests +from sdc.tests.test_basic import get_rank """ Every test in suite can be executed specified times using @@ -38,15 +38,15 @@ loadTestsFromModule returns TestSuite obj with _tests member which contains further TestSuite instanses for each found testCase: - hpat_tests = TestSuite(hpat.tests) - TestSuite(hpat.tests)._tests = [TestSuite(hpat.tests.TestBasic), TestSuite(hpat.tests.TestDataFrame), ...] - TestSuite(hpat.tests.TestBasic)._tests = [TestBasic testMethod=test_array_reduce, ...] + hpat_tests = TestSuite(sdc.tests) + TestSuite(sdc.tests)._tests = [TestSuite(sdc.tests.TestBasic), TestSuite(sdc.tests.TestDataFrame), ...] + TestSuite(sdc.tests.TestBasic)._tests = [TestBasic testMethod=test_array_reduce, ...] """ def load_tests(loader, tests, pattern): suite = unittest.TestSuite() - hpat_tests = loader.loadTestsFromModule(hpat.tests) + hpat_tests = loader.loadTestsFromModule(sdc.tests) repeat_test_number = int(os.getenv('SDC_REPEAT_TEST_NUMBER', '1')) if repeat_test_number > 1: @@ -62,7 +62,7 @@ def load_tests(loader, tests, pattern): if __name__ == '__main__': - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: # initialize MPI get_rank() diff --git a/hpat/set_ext.py b/sdc/set_ext.py similarity index 95% rename from hpat/set_ext.py rename to sdc/set_ext.py index 751868b27..bca3f174b 100644 --- a/hpat/set_ext.py +++ b/sdc/set_ext.py @@ -25,12 +25,12 @@ # ***************************************************************************** -from hpat.str_arr_ext import (StringArray, StringArrayType, string_array_type, +from sdc.str_arr_ext import (StringArray, StringArrayType, string_array_type, pre_alloc_string_array, StringArrayPayloadType, is_str_arr_typ) -from hpat.str_ext import string_type, gen_get_unicode_chars -from hpat.utils import to_array -import hpat +from sdc.str_ext import string_type, gen_get_unicode_chars +from sdc.utils import to_array +import sdc import operator import numba from numba import types, typing, generated_jit @@ -119,7 +119,7 @@ def build_set(A): def _build_str_set_impl(A): - str_arr = hpat.hiframes.api.dummy_unbox_series(A) + str_arr = sdc.hiframes.api.dummy_unbox_series(A) str_set = init_set_string() n = len(str_arr) for i in range(n): @@ -281,11 +281,11 @@ def iternext_setiter(context, builder, sig, args, result): kind = numba.unicode.PY_UNICODE_1BYTE_KIND def std_str_to_unicode(std_str): - length = hpat.str_ext.get_std_str_len(std_str) + length = sdc.str_ext.get_std_str_len(std_str) ret = numba.unicode._empty_string(kind, length) - hpat.str_arr_ext._memcpy( - ret._data, hpat.str_ext.get_c_str(std_str), length, 1) - hpat.str_ext.del_str(std_str) + sdc.str_arr_ext._memcpy( + ret._data, sdc.str_ext.get_c_str(std_str), length, 1) + sdc.str_ext.del_str(std_str) return ret with builder.if_then(is_valid): @@ -293,6 +293,6 @@ def std_str_to_unicode(std_str): val = context.compile_internal( builder, std_str_to_unicode, - string_type(hpat.str_ext.std_str_type), + string_type(sdc.str_ext.std_str_type), [val]) result.yield_(val) diff --git a/hpat/shuffle_utils.py b/sdc/shuffle_utils.py similarity index 93% rename from hpat/shuffle_utils.py rename to sdc/shuffle_utils.py index c07dd37b5..c80597480 100644 --- a/hpat/shuffle_utils.py +++ b/sdc/shuffle_utils.py @@ -31,11 +31,11 @@ from numba import types from numba.extending import overload -import hpat -from hpat.utils import get_ctypes_ptr, _numba_to_c_type_map -from hpat.timsort import getitem_arr_tup -from hpat.str_ext import string_type -from hpat.str_arr_ext import (string_array_type, to_string_list, +import sdc +from sdc.utils import get_ctypes_ptr, _numba_to_c_type_map +from sdc.timsort import getitem_arr_tup +from sdc.str_ext import string_type +from sdc.str_arr_ext import (string_array_type, to_string_list, get_offset_ptr, get_data_ptr, convert_len_arr_to_offset, pre_alloc_string_array, num_total_chars) @@ -155,11 +155,11 @@ def finalize_shuffle_meta_overload(key_arrs, data, pre_shuffle_meta, n_pes, is_c func_text += " send_counts = pre_shuffle_meta.send_counts\n" func_text += " recv_counts = np.empty(n_pes, np.int32)\n" func_text += " tmp_offset = np.zeros(n_pes, np.int32)\n" # for non-contig - func_text += " hpat.distributed_api.alltoall(send_counts, recv_counts, 1)\n" + func_text += " sdc.distributed_api.alltoall(send_counts, recv_counts, 1)\n" func_text += " n_out = recv_counts.sum()\n" func_text += " n_send = send_counts.sum()\n" - func_text += " send_disp = hpat.hiframes.join.calc_disp(send_counts)\n" - func_text += " recv_disp = hpat.hiframes.join.calc_disp(recv_counts)\n" + func_text += " send_disp = sdc.hiframes.join.calc_disp(send_counts)\n" + func_text += " recv_disp = sdc.hiframes.join.calc_disp(recv_counts)\n" n_keys = len(key_arrs.types) n_all = len(key_arrs.types + data.types) @@ -185,15 +185,15 @@ def finalize_shuffle_meta_overload(key_arrs, data, pre_shuffle_meta, n_pes, is_c # send/recv counts func_text += " send_counts_char_{} = pre_shuffle_meta.send_counts_char_tup[{}]\n".format(n_str, n_str) func_text += " recv_counts_char_{} = np.empty(n_pes, np.int32)\n".format(n_str) - func_text += (" hpat.distributed_api.alltoall(" + func_text += (" sdc.distributed_api.alltoall(" "send_counts_char_{}, recv_counts_char_{}, 1)\n").format(n_str, n_str) # alloc output func_text += " n_all_chars = recv_counts_char_{}.sum()\n".format(n_str) func_text += " out_arr_{} = pre_alloc_string_array(n_out, n_all_chars)\n".format(i) # send/recv disp - func_text += (" send_disp_char_{} = hpat.hiframes.join." + func_text += (" send_disp_char_{} = sdc.hiframes.join." "calc_disp(send_counts_char_{})\n").format(n_str, n_str) - func_text += (" recv_disp_char_{} = hpat.hiframes.join." + func_text += (" recv_disp_char_{} = sdc.hiframes.join." "calc_disp(recv_counts_char_{})\n").format(n_str, n_str) # tmp_offset_char, send_arr_lens @@ -245,14 +245,14 @@ def finalize_shuffle_meta_overload(key_arrs, data, pre_shuffle_meta, n_pes, is_c str_comma) loc_vars = {} - exec(func_text, {'np': np, 'hpat': hpat, + exec(func_text, {'np': np, 'sdc': sdc, 'pre_alloc_string_array': pre_alloc_string_array, 'num_total_chars': num_total_chars, 'get_data_ptr': get_data_ptr, 'ShuffleMeta': ShuffleMeta, 'get_ctypes_ptr': get_ctypes_ptr, 'fix_cat_array_type': - hpat.hiframes.pd_categorical_ext.fix_cat_array_type}, loc_vars) + sdc.hiframes.pd_categorical_ext.fix_cat_array_type}, loc_vars) finalize_impl = loc_vars['f'] return finalize_impl @@ -265,7 +265,7 @@ def alltoallv(arr, m): def alltoallv_impl(arr, metadata): if isinstance(arr, types.Array): def a2av_impl(arr, metadata): - hpat.distributed_api.alltoallv( + sdc.distributed_api.alltoallv( metadata.send_buff, metadata.out_arr, metadata.send_counts, metadata.recv_counts, metadata.send_disp, metadata.recv_disp) return a2av_impl @@ -277,7 +277,7 @@ def a2av_impl(arr, metadata): def a2av_str_impl(arr, metadata): # TODO: increate refcount? offset_ptr = get_offset_ptr(metadata.out_arr) - hpat.distributed_api.c_alltoallv( + sdc.distributed_api.c_alltoallv( metadata.send_arr_lens.ctypes, offset_ptr, metadata.send_counts.ctypes, @@ -285,7 +285,7 @@ def a2av_str_impl(arr, metadata): metadata.send_disp.ctypes, metadata.recv_disp.ctypes, int32_typ_enum) - hpat.distributed_api.c_alltoallv( + sdc.distributed_api.c_alltoallv( metadata.send_arr_chars, get_data_ptr( metadata.out_arr), @@ -308,19 +308,19 @@ def alltoallv_tup_overload(arrs, meta): n_str = 0 for i, typ in enumerate(arrs.types): if isinstance(typ, types.Array): - func_text += (" hpat.distributed_api.alltoallv(" + func_text += (" sdc.distributed_api.alltoallv(" "meta.send_buff_tup[{}], meta.out_arr_tup[{}], meta.send_counts," "meta.recv_counts, meta.send_disp, meta.recv_disp)\n").format(i, i) else: assert typ == string_array_type func_text += " offset_ptr_{} = get_offset_ptr(meta.out_arr_tup[{}])\n".format(i, i) - func_text += (" hpat.distributed_api.c_alltoallv(" + func_text += (" sdc.distributed_api.c_alltoallv(" "meta.send_arr_lens_tup[{}].ctypes, offset_ptr_{}, meta.send_counts.ctypes, " "meta.recv_counts.ctypes, meta.send_disp.ctypes, " "meta.recv_disp.ctypes, int32_typ_enum)\n").format(n_str, i) - func_text += (" hpat.distributed_api.c_alltoallv(" + func_text += (" sdc.distributed_api.c_alltoallv(" "meta.send_arr_chars_tup[{}], get_data_ptr(meta.out_arr_tup[{}])," "meta.send_counts_char_tup[{}].ctypes, meta.recv_counts_char_tup[{}].ctypes," "meta.send_disp_char_tup[{}].ctypes, meta.recv_disp_char_tup[{}].ctypes," @@ -336,7 +336,7 @@ def alltoallv_tup_overload(arrs, meta): int32_typ_enum = np.int32(_numba_to_c_type_map[types.int32]) char_typ_enum = np.int32(_numba_to_c_type_map[types.uint8]) loc_vars = {} - exec(func_text, {'hpat': hpat, 'get_offset_ptr': get_offset_ptr, + exec(func_text, {'sdc': sdc, 'get_offset_ptr': get_offset_ptr, 'get_data_ptr': get_data_ptr, 'int32_typ_enum': int32_typ_enum, 'char_typ_enum': char_typ_enum, 'convert_len_arr_to_offset': convert_len_arr_to_offset}, loc_vars) diff --git a/hpat/str_arr_ext.py b/sdc/str_arr_ext.py similarity index 99% rename from hpat/str_arr_ext.py rename to sdc/str_arr_ext.py index b4b33eb70..3e5ebde21 100644 --- a/hpat/str_arr_ext.py +++ b/sdc/str_arr_ext.py @@ -32,7 +32,7 @@ import operator import numpy as np import numba -import hpat +import sdc from numba import types from numba.typing.templates import (infer_global, AbstractTemplate, infer, signature, AttributeTemplate, infer_getattr, bound_function) @@ -41,7 +41,7 @@ make_attribute_wrapper, lower_builtin, box, unbox, lower_getattr, intrinsic, overload_method, overload, overload_attribute) from numba import cgutils -from hpat.str_ext import string_type +from sdc.str_ext import string_type from numba.targets.imputils import (impl_ret_new_ref, impl_ret_borrowed, iternext_impl, RefType) from numba.targets.hashing import _Py_hash_t @@ -578,7 +578,7 @@ class CmpOpLTStringArray(CmpOpEqStringArray): def is_str_arr_typ(typ): - from hpat.hiframes.pd_series_ext import is_str_series_typ + from sdc.hiframes.pd_series_ext import is_str_series_typ return typ == string_array_type or is_str_series_typ(typ) # @infer_global(len) @@ -1277,7 +1277,7 @@ def str_arr_slice_impl(str_arr, idx): @numba.njit(no_cpython_wrapper=True) def str_arr_item_to_numeric(out_arr, out_ind, str_arr, ind): - return _str_arr_item_to_numeric(hpat.hiframes.split_impl.get_c_arr_ptr( + return _str_arr_item_to_numeric(sdc.hiframes.split_impl.get_c_arr_ptr( out_arr.ctypes, out_ind), str_arr, ind, out_arr.dtype) @@ -1428,7 +1428,7 @@ def append_string_array_to(result, pos, A): for str in A: result[j] = str if str_arr_is_na(A, i): - hpat.str_arr_ext.str_arr_set_na(result, j) + sdc.str_arr_ext.str_arr_set_na(result, j) i += 1 j += 1 diff --git a/hpat/str_ext.py b/sdc/str_ext.py similarity index 99% rename from hpat/str_ext.py rename to sdc/str_ext.py index 235acb9d6..9d1a973d0 100644 --- a/hpat/str_ext.py +++ b/sdc/str_ext.py @@ -41,9 +41,9 @@ from numba.typing.templates import (signature, AbstractTemplate, infer, infer_getattr, ConcreteTemplate, AttributeTemplate, bound_function, infer_global) -import hpat +import sdc from . import hstr_ext -# from hpat.utils import unliteral_all +# from sdc.utils import unliteral_all # TODO: resolve import conflict @@ -506,17 +506,17 @@ def gen_std_str_to_unicode(context, builder, std_str_val, del_str=False): kind = numba.unicode.PY_UNICODE_1BYTE_KIND def _std_str_to_unicode(std_str): - length = hpat.str_ext.get_std_str_len(std_str) + length = sdc.str_ext.get_std_str_len(std_str) ret = numba.unicode._empty_string(kind, length) - hpat.str_arr_ext._memcpy( - ret._data, hpat.str_ext.get_c_str(std_str), length, 1) + sdc.str_arr_ext._memcpy( + ret._data, sdc.str_ext.get_c_str(std_str), length, 1) if del_str: - hpat.str_ext.del_str(std_str) + sdc.str_ext.del_str(std_str) return ret val = context.compile_internal( builder, _std_str_to_unicode, - string_type(hpat.str_ext.std_str_type), + string_type(sdc.str_ext.std_str_type), [std_str_val]) return val diff --git a/hpat/stringlib/asciilib.h b/sdc/stringlib/asciilib.h similarity index 100% rename from hpat/stringlib/asciilib.h rename to sdc/stringlib/asciilib.h diff --git a/hpat/stringlib/bytesobject.cpp b/sdc/stringlib/bytesobject.cpp similarity index 100% rename from hpat/stringlib/bytesobject.cpp rename to sdc/stringlib/bytesobject.cpp diff --git a/hpat/stringlib/codecs.h b/sdc/stringlib/codecs.h similarity index 100% rename from hpat/stringlib/codecs.h rename to sdc/stringlib/codecs.h diff --git a/hpat/stringlib/ucs1lib.h b/sdc/stringlib/ucs1lib.h similarity index 100% rename from hpat/stringlib/ucs1lib.h rename to sdc/stringlib/ucs1lib.h diff --git a/hpat/stringlib/ucs2lib.h b/sdc/stringlib/ucs2lib.h similarity index 100% rename from hpat/stringlib/ucs2lib.h rename to sdc/stringlib/ucs2lib.h diff --git a/hpat/stringlib/ucs4lib.h b/sdc/stringlib/ucs4lib.h similarity index 100% rename from hpat/stringlib/ucs4lib.h rename to sdc/stringlib/ucs4lib.h diff --git a/hpat/stringlib/undef.h b/sdc/stringlib/undef.h similarity index 100% rename from hpat/stringlib/undef.h rename to sdc/stringlib/undef.h diff --git a/hpat/tests/__init__.py b/sdc/tests/__init__.py similarity index 75% rename from hpat/tests/__init__.py rename to sdc/tests/__init__.py index c7fbf4d51..b65f1e541 100644 --- a/hpat/tests/__init__.py +++ b/sdc/tests/__init__.py @@ -25,24 +25,24 @@ # ***************************************************************************** -from hpat.tests.test_basic import * -from hpat.tests.test_series import * -from hpat.tests.test_dataframe import * -from hpat.tests.test_hiframes import * +from sdc.tests.test_basic import * +from sdc.tests.test_series import * +from sdc.tests.test_dataframe import * +from sdc.tests.test_hiframes import * -# from hpat.tests.test_d4p import * -from hpat.tests.test_date import * -from hpat.tests.test_strings import * +# from sdc.tests.test_d4p import * +from sdc.tests.test_date import * +from sdc.tests.test_strings import * -from hpat.tests.test_groupby import * -from hpat.tests.test_join import * -from hpat.tests.test_rolling import * +from sdc.tests.test_groupby import * +from sdc.tests.test_join import * +from sdc.tests.test_rolling import * -from hpat.tests.test_ml import * +from sdc.tests.test_ml import * -from hpat.tests.test_io import * +from sdc.tests.test_io import * -from hpat.tests.test_hpat_jit import * +from sdc.tests.test_hpat_jit import * # performance tests -import hpat.tests.tests_perf +import sdc.tests.tests_perf diff --git a/hpat/tests/gen_test_data.py b/sdc/tests/gen_test_data.py similarity index 100% rename from hpat/tests/gen_test_data.py rename to sdc/tests/gen_test_data.py diff --git a/hpat/tests/sdf_dt.pq.bz2 b/sdc/tests/sdf_dt.pq.bz2 similarity index 100% rename from hpat/tests/sdf_dt.pq.bz2 rename to sdc/tests/sdf_dt.pq.bz2 diff --git a/hpat/tests/test_basic.py b/sdc/tests/test_basic.py similarity index 87% rename from hpat/tests/test_basic.py rename to sdc/tests/test_basic.py index d347136e3..4485d680c 100644 --- a/hpat/tests/test_basic.py +++ b/sdc/tests/test_basic.py @@ -30,9 +30,9 @@ import numpy as np import itertools import numba -import hpat +import sdc import random -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, count_array_OneD_Vars, dist_IR_contains, get_rank, get_start_end, check_numba_version) @@ -55,18 +55,18 @@ class BaseTest(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.rank = hpat.jit(lambda: hpat.distributed_api.get_rank())() - self.num_ranks = hpat.jit(lambda: hpat.distributed_api.get_size())() + self.rank = sdc.jit(lambda: sdc.distributed_api.get_rank())() + self.num_ranks = sdc.jit(lambda: sdc.distributed_api.get_size())() def _rank_begin(self, arr_len): - f = hpat.jit( - lambda arr_len, num_ranks, rank: hpat.distributed_api.get_start( + f = sdc.jit( + lambda arr_len, num_ranks, rank: sdc.distributed_api.get_start( arr_len, np.int32(num_ranks), np.int32(rank))) return f(arr_len, self.num_ranks, self.rank) def _rank_end(self, arr_len): - f = hpat.jit( - lambda arr_len, num_ranks, rank: hpat.distributed_api.get_end( + f = sdc.jit( + lambda arr_len, num_ranks, rank: sdc.distributed_api.get_end( arr_len, np.int32(num_ranks), np.int32(rank))) return f(arr_len, self.num_ranks, self.rank) @@ -88,7 +88,7 @@ def test_impl(N): C = A[B] return C.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -100,7 +100,7 @@ def test_impl(N): A[0] = 30 return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -112,7 +112,7 @@ def test_impl(N): A[0:4] = 30 return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -122,7 +122,7 @@ def test_astype(self): def test_impl(N): return np.ones(N).astype(np.int32).sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -132,7 +132,7 @@ def test_shape(self): def test_impl(N): return np.ones(N).shape[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -141,7 +141,7 @@ def test_impl(N): # def test_impl(N): # return np.ones((N, 3, 4)).shape # - # hpat_func = hpat.jit(test_impl) + # hpat_func = sdc.jit(test_impl) # n = 128 # np.testing.assert_allclose(hpat_func(n), test_impl(n)) # self.assertEqual(count_array_REPs(), 0) @@ -154,7 +154,7 @@ def test_impl(N): B += A return B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -167,7 +167,7 @@ def test_impl(N): C = A[B, 2] return C.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -179,7 +179,7 @@ def test_impl(N): X[:, 3] = (X[:, 3]) / (np.max(X[:, 3]) - np.min(X[:, 3])) return X.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -191,7 +191,7 @@ def test_impl(N): B = A[::7] return B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -203,17 +203,17 @@ def test_assert(self): def g(a): assert a == 0 - hpat_g = hpat.jit(g) + hpat_g = sdc.jit(g) def f(): hpat_g(0) - hpat_f = hpat.jit(f) + hpat_f = sdc.jit(f) hpat_f() def test_inline_locals(self): # make sure locals in inlined function works - @hpat.jit(locals={'B': hpat.float64[:]}) + @sdc.jit(locals={'B': sdc.float64[:]}) def g(S): B = pd.to_numeric(S, errors='coerce') return B @@ -221,7 +221,7 @@ def g(S): def f(): return g(pd.Series(['1.2'])) - pd.testing.assert_series_equal(hpat.jit(f)(), f()) + pd.testing.assert_series_equal(sdc.jit(f)(), f()) def test_reduce(self): import sys @@ -241,7 +241,7 @@ def test_reduce(self): exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 21 # XXX arange() on float32 has overflow issues on large n np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -264,7 +264,7 @@ def test_reduce2(self): exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] - hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl) + hpat_func = sdc.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) @@ -292,7 +292,7 @@ def test_reduce_filter1(self): exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] - hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl) + hpat_func = sdc.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) @@ -318,7 +318,7 @@ def test_array_reduce(self): exec(func_text, {'np': np, 'numba': numba}, loc_vars) test_impl = loc_vars['f'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 0) @@ -331,11 +331,11 @@ def test_impl(N): A = np.arange(N) return A - hpat_func = hpat.jit(locals={'A:return': 'distributed'})(test_impl) + hpat_func = sdc.jit(locals={'A:return': 'distributed'})(test_impl) n = 128 - dist_sum = hpat.jit( - lambda a: hpat.distributed_api.dist_reduce( - a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) + dist_sum = sdc.jit( + lambda a: sdc.distributed_api.dist_reduce( + a, np.int32(sdc.distributed_api.Reduce_Type.Sum.value))) dist_sum(1) # run to compile np.testing.assert_allclose( dist_sum(hpat_func(n).sum()), test_impl(n).sum()) @@ -350,12 +350,12 @@ def test_impl(N): B = np.arange(N) + 1.5 return A, B - hpat_func = hpat.jit(locals={'A:return': 'distributed', + hpat_func = sdc.jit(locals={'A:return': 'distributed', 'B:return': 'distributed'})(test_impl) n = 128 - dist_sum = hpat.jit( - lambda a: hpat.distributed_api.dist_reduce( - a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) + dist_sum = sdc.jit( + lambda a: sdc.distributed_api.dist_reduce( + a, np.int32(sdc.distributed_api.Reduce_Type.Sum.value))) dist_sum(1.0) # run to compile np.testing.assert_allclose( dist_sum((hpat_func(n)[0] + hpat_func(n)[1]).sum()), (test_impl(n)[0] + test_impl(n)[1]).sum()) @@ -366,7 +366,7 @@ def test_dist_input(self): def test_impl(A): return len(A) - hpat_func = hpat.jit(distributed=['A'])(test_impl) + hpat_func = sdc.jit(distributed=['A'])(test_impl) n = 128 arr = np.ones(n) np.testing.assert_allclose(hpat_func(arr) / self.num_ranks, test_impl(arr)) @@ -378,18 +378,18 @@ def test_rebalance(self): def test_impl(N): A = np.arange(n) B = A[A > 10] - C = hpat.distributed_api.rebalance_array(B) + C = sdc.distributed_api.rebalance_array(B) return C.sum() try: - hpat.distributed_analysis.auto_rebalance = True - hpat_func = hpat.jit(test_impl) + sdc.distributed_analysis.auto_rebalance = True + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 2) finally: - hpat.distributed_analysis.auto_rebalance = False + sdc.distributed_analysis.auto_rebalance = False @unittest.skipIf(check_numba_version('0.46.0'), "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") @@ -403,15 +403,15 @@ def test_impl(N): return s try: - hpat.distributed_analysis.auto_rebalance = True - hpat_func = hpat.jit(test_impl) + sdc.distributed_analysis.auto_rebalance = True + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 4) self.assertEqual(count_parfor_OneDs(), 2) self.assertIn('allgather', list(hpat_func.inspect_llvm().values())[0]) finally: - hpat.distributed_analysis.auto_rebalance = False + sdc.distributed_analysis.auto_rebalance = False def test_transpose(self): def test_impl(n): @@ -420,7 +420,7 @@ def test_impl(n): C = A.transpose(0, 2, 1) return B.sum() + C.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -462,7 +462,7 @@ def python_one_dim(arr_len, r): # details please see https://github.com/numba/numba/issues/2782. r = self._follow_cpython(get_np_state_ptr()) - hpat_func1 = hpat.jit(locals={'A:return': 'distributed', + hpat_func1 = sdc.jit(locals={'A:return': 'distributed', 'B:return': 'distributed'})(test_one_dim) # Test one-dimensional array indexing. @@ -492,7 +492,7 @@ def python_two_dim(arr_len, r): A, B = A[P], B[P] return A, B - hpat_func2 = hpat.jit(locals={'A:return': 'distributed', + hpat_func2 = sdc.jit(locals={'A:return': 'distributed', 'B:return': 'distributed'})(test_two_dim) for arr_len in [18, 66, 128]: @@ -511,7 +511,7 @@ def test_rhs(arr_len): C = A[P] return A, B, C - hpat_func3 = hpat.jit(locals={'A:return': 'distributed', + hpat_func3 = sdc.jit(locals={'A:return': 'distributed', 'B:return': 'distributed', 'C:return': 'distributed'})(test_rhs) diff --git a/hpat/tests/test_d4p.py b/sdc/tests/test_d4p.py similarity index 94% rename from hpat/tests/test_d4p.py rename to sdc/tests/test_d4p.py index 9f4758dbb..9d85f191d 100644 --- a/hpat/tests/test_d4p.py +++ b/sdc/tests/test_d4p.py @@ -35,8 +35,8 @@ import numpy as np from math import sqrt import numba - import hpat - from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, + import sdc + from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, count_parfor_OneD_Vars, count_array_OneD_Vars, dist_IR_contains) @@ -65,8 +65,8 @@ def prdct_impl(n, d, model): ) return algo.compute(w, model) - train_hpat = hpat.jit(train_impl) - prdct_hpat = hpat.jit(prdct_impl) + train_hpat = sdc.jit(train_impl) + prdct_hpat = sdc.jit(prdct_impl) n = 11 d = 4 pred_impl = prdct_impl(n, d, train_impl(n, d).model).prediction diff --git a/hpat/tests/test_dataframe.py b/sdc/tests/test_dataframe.py similarity index 88% rename from hpat/tests/test_dataframe.py rename to sdc/tests/test_dataframe.py index 8525eed3e..cf9ab680f 100644 --- a/hpat/tests/test_dataframe.py +++ b/sdc/tests/test_dataframe.py @@ -34,15 +34,15 @@ import numpy as np import numba -import hpat -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, +import sdc +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, dist_IR_contains, get_start_end, check_numba_version) -from hpat.tests.gen_test_data import ParquetGenerator +from sdc.tests.gen_test_data import ParquetGenerator from numba.config import IS_32BITS -@hpat.jit +@sdc.jit def inner_get_column(df): # df2 = df[['A', 'C']] # df2['D'] = np.ones(3) @@ -59,7 +59,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) return df.A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -71,7 +71,7 @@ def test_impl(A, B, c): df = pd.DataFrame({'A': B}) return df.A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 A = np.ones(n) B = np.arange(n) + 1.0 @@ -86,14 +86,14 @@ def test_impl(): df = pd.DataFrame([100, 200, 300, 400, 200, 100]) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_unbox1(self): def test_impl(df): return df.A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) pd.testing.assert_series_equal(hpat_func(df), test_impl(df)) @@ -106,7 +106,7 @@ def test_impl(df, cond): df['A'] = np.arange(n) + 2.0 return df.A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) pd.testing.assert_series_equal(hpat_func(df.copy(), True), test_impl(df.copy(), True)) @@ -118,7 +118,7 @@ def test_impl(df): return df df = pd.DataFrame([100, 200, 300, 400, 200, 100]) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_box1(self): @@ -126,7 +126,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 do_check = False if platform.system() == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(hpat_func(n), test_impl(n), check_dtype=do_check) @@ -136,7 +136,7 @@ def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'bb', 'ccc']}) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skip("pending df filter support") @@ -145,7 +145,7 @@ def test_impl(df): df = df[df.A != 'dd'] return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': ['aa', 'bb', 'cc']}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) @@ -154,7 +154,7 @@ def test_impl(df): df['A'] = df['A'] + 1 return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [1, 2, 3], 'B': pd.Series(['N', 'Y', 'Y'], dtype=pd.api.types.CategoricalDtype(['N', 'Y']))}) @@ -167,14 +167,14 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df - hpat_func = hpat.jit(distributed={'df'})(test_impl) + hpat_func = sdc.jit(distributed={'df'})(test_impl) n = 11 hres, res = hpat_func(n), test_impl(n) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 2) - dist_sum = hpat.jit( - lambda a: hpat.distributed_api.dist_reduce( - a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) + dist_sum = sdc.jit( + lambda a: sdc.distributed_api.dist_reduce( + a, np.int32(sdc.distributed_api.Reduce_Type.Sum.value))) dist_sum(1) # run to compile np.testing.assert_allclose(dist_sum(hres.A.sum()), res.A.sum()) np.testing.assert_allclose(dist_sum(hres.B.sum()), res.B.sum()) @@ -184,7 +184,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.random.ranf(n)}) return len(df) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -195,7 +195,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.random.ranf(n)}) return df.shape - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -207,7 +207,7 @@ def test_impl(n): Ac = df['A'].values return Ac.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -218,7 +218,7 @@ def test_column_list_getitem1(self): def test_impl(df): return df[['A', 'C']] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame( {'A': np.arange(n), 'B': np.ones(n), 'C': np.random.ranf(n)}) @@ -230,7 +230,7 @@ def test_impl(n): df1 = df[df.A > .5] return df1.B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -242,7 +242,7 @@ def test_impl(n): df1 = df.loc[df.A > .5] return np.sum(df1.B) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -254,7 +254,7 @@ def test_impl(n): df1 = df.iloc[(df.A > .5).values] return np.sum(df1.B) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -264,7 +264,7 @@ def test_iloc1(self): def test_impl(df, n): return df.iloc[1:n].B.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n)) @@ -273,7 +273,7 @@ def test_iloc2(self): def test_impl(df, n): return df.iloc[np.array([1, 4, 9])].B.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n)) @@ -282,7 +282,7 @@ def test_iloc3(self): def test_impl(df): return df.iloc[:, 1].values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -292,7 +292,7 @@ def test_iloc4(self): def test_impl(df, n): return df.iloc[[1, 4, 9]].B.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n)) @@ -302,7 +302,7 @@ def test_iloc5(self): def test_impl(df): return df.iloc[:, COL_IND].values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -311,7 +311,7 @@ def test_loc1(self): def test_impl(df): return df.loc[:, 'B'].values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -320,14 +320,14 @@ def test_iat1(self): def test_impl(n): df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) return df.iat[3, 1] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) def test_iat2(self): def test_impl(df): return df.iat[3, 1] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) self.assertEqual(hpat_func(df), test_impl(df)) @@ -335,7 +335,7 @@ def test_impl(df): def test_iat3(self): def test_impl(df, n): return df.iat[n - 1, 1] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) self.assertEqual(hpat_func(df, n), test_impl(df, n)) @@ -344,7 +344,7 @@ def test_iat_set1(self): def test_impl(df, n): df.iat[n - 1, 1] = n**2 return df.A # return the column to check column aliasing - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) df2 = df.copy() @@ -354,7 +354,7 @@ def test_iat_set2(self): def test_impl(df, n): df.iat[n - 1, 1] = n**2 return df # check df aliasing/boxing - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) df2 = df.copy() @@ -367,7 +367,7 @@ def test_impl(n): df['A'] = np.arange(n) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 do_check = False if platform.system() == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(hpat_func(n), test_impl(n), check_dtype=do_check) @@ -377,7 +377,7 @@ def test_set_column_reflect4(self): def test_impl(df, n): df['A'] = np.arange(n) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df1 = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.arange(n) + 3.0}) df2 = df1.copy() @@ -393,7 +393,7 @@ def test_impl(n): df['A'] = np.arange(n) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 do_check = False if platform.system() == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(hpat_func(n), test_impl(n), check_dtype=do_check) @@ -405,7 +405,7 @@ def test_impl(n): df['C'] = np.arange(n) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 do_check = False if platform.system() == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(hpat_func(n), test_impl(n), check_dtype=do_check) @@ -415,7 +415,7 @@ def test_set_column_reflect3(self): def test_impl(df, n): df['C'] = np.arange(n) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df1 = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.arange(n) + 3.0}) df2 = df1.copy() @@ -428,7 +428,7 @@ def test_set_column_bool1(self): def test_impl(df): df['C'] = df['A'][df['B']] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [1, 2, 3], 'B': [True, False, True]}) df2 = df.copy() test_impl(df2) @@ -440,7 +440,7 @@ def test_impl(df, arr): df['C'] = arr return df.C.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 arr = np.random.ranf(n) df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) @@ -453,7 +453,7 @@ def test_impl(df, arr): df['C'] = arr return df.C.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 arr = np.random.ranf(n) df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) @@ -465,7 +465,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 np.testing.assert_array_equal(hpat_func(n), test_impl(n)) @@ -473,7 +473,7 @@ def test_df_values2(self): def test_impl(df): return df.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -483,7 +483,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df.values.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 np.testing.assert_array_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -496,7 +496,7 @@ def test_impl(n): return df.B.sum() n = 121 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) def test_df_apply_branch(self): @@ -506,7 +506,7 @@ def test_impl(n): return df.B.sum() n = 121 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) def test_df_describe(self): @@ -516,7 +516,7 @@ def test_impl(n): #df.A[0:1] = np.nan return df.describe() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 1001 hpat_func(n) # XXX: test actual output @@ -531,7 +531,7 @@ def test_impl(df): n = 1211 np.random.seed(2) df = pd.DataFrame({'A': np.random.ranf(n), 'B': np.arange(n), 'C': np.random.ranf(n)}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df)) def test_sort_values_copy(self): @@ -542,7 +542,7 @@ def test_impl(df): n = 1211 np.random.seed(2) df = pd.DataFrame({'A': np.random.ranf(n), 'B': np.arange(n), 'C': np.random.ranf(n)}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df)) def test_sort_values_single_col(self): @@ -553,7 +553,7 @@ def test_impl(df): n = 1211 np.random.seed(2) df = pd.DataFrame({'A': np.random.ranf(n)}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df)) def test_sort_values_single_col_str(self): @@ -570,7 +570,7 @@ def test_impl(df): val = ''.join(random.choices(string.ascii_uppercase + string.digits, k=k)) str_vals.append(val) df = pd.DataFrame({'A': str_vals}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertTrue((hpat_func(df.copy()) == test_impl(df)).all()) def test_sort_values_str(self): @@ -593,7 +593,7 @@ def test_impl(df): df = pd.DataFrame({'A': str_vals, 'B': str_vals2}) # use mergesort for stability, in str generation equal keys are more probable sorted_df = df.sort_values('A', inplace=False, kind='mergesort') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertTrue((hpat_func(df) == sorted_df.B.values).all()) def test_sort_parallel_single_col(self): @@ -607,22 +607,22 @@ def test_impl(): res = df.points.values return res - hpat_func = hpat.jit(locals={'res:return': 'distributed'})(test_impl) + hpat_func = sdc.jit(locals={'res:return': 'distributed'})(test_impl) - save_min_samples = hpat.hiframes.sort.MIN_SAMPLES + save_min_samples = sdc.hiframes.sort.MIN_SAMPLES try: - hpat.hiframes.sort.MIN_SAMPLES = 10 + sdc.hiframes.sort.MIN_SAMPLES = 10 res = hpat_func() self.assertTrue((np.diff(res) >= 0).all()) finally: # restore global val - hpat.hiframes.sort.MIN_SAMPLES = save_min_samples + sdc.hiframes.sort.MIN_SAMPLES = save_min_samples def test_df_isna1(self): '''Verify DataFrame.isna implementation for various types of data''' def test_impl(df): return df.isna() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) # TODO: add column with datetime values when test_series_datetime_isna1 is fixed df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], @@ -635,7 +635,7 @@ def test_df_astype_str1(self): '''Verifies DataFrame.astype implementation converting various types to string''' def test_impl(df): return df.astype(str) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) # TODO: add column with float values when test_series_astype_float_to_str1 is fixed df = pd.DataFrame({'A': [-1, 2, 11, 5, 0, -7], @@ -647,7 +647,7 @@ def test_df_astype_float1(self): '''Verifies DataFrame.astype implementation converting various types to float''' def test_impl(df): return df.astype(np.float64) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) # TODO: uncomment column with string values when test_series_astype_str_to_float64 is fixed df = pd.DataFrame({'A': [-1, 2, 11, 5, 0, -7], @@ -660,7 +660,7 @@ def test_df_astype_int1(self): '''Verifies DataFrame.astype implementation converting various types to int''' def test_impl(df): return df.astype(np.int32) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 6 # TODO: uncomment column with string values when test_series_astype_str_to_int32 is fixed @@ -683,16 +683,16 @@ def test_impl(): res = df.A.values return res - hpat_func = hpat.jit(locals={'res:return': 'distributed'})(test_impl) + hpat_func = sdc.jit(locals={'res:return': 'distributed'})(test_impl) - save_min_samples = hpat.hiframes.sort.MIN_SAMPLES + save_min_samples = sdc.hiframes.sort.MIN_SAMPLES try: - hpat.hiframes.sort.MIN_SAMPLES = 10 + sdc.hiframes.sort.MIN_SAMPLES = 10 res = hpat_func() self.assertTrue((np.diff(res) >= 0).all()) finally: # restore global val - hpat.hiframes.sort.MIN_SAMPLES = save_min_samples + sdc.hiframes.sort.MIN_SAMPLES = save_min_samples def test_itertuples(self): def test_impl(df): @@ -701,7 +701,7 @@ def test_impl(df): res += r[1] return res - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.ones(n, np.int64)}) self.assertEqual(hpat_func(df), test_impl(df)) @@ -713,7 +713,7 @@ def test_impl(df): res += r[1] return res - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 3 df = pd.DataFrame({'A': ['aa', 'bb', 'cc'], 'B': np.ones(n, np.int64)}) self.assertEqual(hpat_func(df), test_impl(df)) @@ -726,7 +726,7 @@ def test_impl(n): res += r[1] return res - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) @@ -743,7 +743,7 @@ def test_impl(n): res += len(A) return res - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) @@ -753,7 +753,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df.head(3) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) @@ -762,7 +762,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.pct_change(3) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) @@ -772,7 +772,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.mean() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -782,7 +782,7 @@ def test_impl(n): df = pd.DataFrame({'A': 2 ** np.arange(n), 'B': np.arange(n) + 1.0}) return df.median() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -792,7 +792,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.std() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -802,7 +802,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.var() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -812,7 +812,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.max() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -822,18 +822,18 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.min() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) - @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, "DataFrame.sum() not implemented in new style") + @unittest.skipIf(not sdc.config.config_pipeline_hpat_default, "DataFrame.sum() not implemented in new style") def test_sum1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -843,7 +843,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.prod() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -852,7 +852,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) return df.count() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -862,7 +862,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.count() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @@ -871,7 +871,7 @@ def test_impl(df): return df.fillna(5.0) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_fillna_str1(self): @@ -879,7 +879,7 @@ def test_impl(df): return df.fillna("dd") df = pd.DataFrame({'A': ['aa', 'b', None, 'ccc']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_fillna_inplace1(self): @@ -889,7 +889,7 @@ def test_impl(A): df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) df2 = df.copy() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df2)) def test_df_reset_index1(self): @@ -897,7 +897,7 @@ def test_impl(df): return df.reset_index(drop=True) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_reset_index_inplace1(self): @@ -906,7 +906,7 @@ def test_impl(): df.reset_index(drop=True, inplace=True) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_df_dropna1(self): @@ -914,7 +914,7 @@ def test_impl(df): return df.dropna() df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) out = test_impl(df).reset_index(drop=True) h_out = hpat_func(df) pd.testing.assert_frame_equal(out, h_out) @@ -924,7 +924,7 @@ def test_impl(df): return df.dropna() df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) out = test_impl(df).reset_index(drop=True) h_out = hpat_func(df) pd.testing.assert_frame_equal(out, h_out) @@ -937,7 +937,7 @@ def test_impl(df): df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) df2 = df.copy() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) out = test_impl(df).reset_index(drop=True) h_out = hpat_func(df2) pd.testing.assert_frame_equal(out, h_out) @@ -947,7 +947,7 @@ def test_impl(df): return df.dropna() df = pd.DataFrame({'A': [1.0, 2.0, 4.0, 1.0], 'B': ['aa', 'b', None, 'ccc']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) out = test_impl(df).reset_index(drop=True) h_out = hpat_func(df) pd.testing.assert_frame_equal(out, h_out) @@ -957,7 +957,7 @@ def test_impl(df): return df.drop(columns=['A']) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_drop_inplace2(self): @@ -969,7 +969,7 @@ def test_impl(df): return df2 df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_drop_inplace1(self): @@ -979,14 +979,14 @@ def test_impl(df): df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) df2 = df.copy() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df2)) def test_isin_df1(self): def test_impl(df, df2): return df.isin(df2) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) df2 = pd.DataFrame({'A': np.arange(n), 'C': np.arange(n)**2}) @@ -999,7 +999,7 @@ def test_impl(df): vals = {'A': [2, 3, 4], 'C': [4, 5, 6]} return df.isin(vals) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) @@ -1009,7 +1009,7 @@ def test_impl(df): vals = [2, 3, 4] return df.isin(vals) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) @@ -1018,7 +1018,7 @@ def test_append1(self): def test_impl(df, df2): return df.append(df2, ignore_index=True) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) df2 = pd.DataFrame({'A': np.arange(n), 'C': np.arange(n)**2}) @@ -1029,7 +1029,7 @@ def test_append2(self): def test_impl(df, df2, df3): return df.append([df2, df3], ignore_index=True) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) df2 = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) @@ -1042,7 +1042,7 @@ def test_concat_columns1(self): def test_impl(S1, S2): return pd.concat([S1, S2], axis=1) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) S1 = pd.Series([4, 5]) S2 = pd.Series([6., 7.]) # TODO: support int as column name @@ -1060,7 +1060,7 @@ def test_impl(): df['C'] = np.ones(3) return inner_get_column(df) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal(hpat_func(), test_impl(), check_names=False) @unittest.skip("Implement getting columns attribute") @@ -1069,7 +1069,7 @@ def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) return df.columns - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl()) @unittest.skip("Implement getting columns attribute") @@ -1078,7 +1078,7 @@ def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) return [column for column in df.columns] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl()) @unittest.skip("Implement set_index for DataFrame") @@ -1089,7 +1089,7 @@ def test_impl(): 'sale': [55, 40, 84, 31]}) return df.set_index('month') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skip("Implement sort_index for DataFrame") @@ -1098,7 +1098,7 @@ def test_impl(): df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=[100, 29, 234, 1, 150]) return df.sort_index() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skip("Implement iterrows for DataFrame") @@ -1108,7 +1108,7 @@ def test_impl(df): return [row for _, row in df.iterrows()] df = pd.DataFrame({'A': [1, 2, 3], 'B': [0.2, 0.5, 0.001], 'C': ['a', 'bb', 'ccc']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @unittest.skip("Support parameter axis=1") @@ -1118,7 +1118,7 @@ def test_impl(n): return df.sum(axis=1) n = 100 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) diff --git a/hpat/tests/test_date.py b/sdc/tests/test_date.py similarity index 87% rename from hpat/tests/test_date.py rename to sdc/tests/test_date.py index 7cfebe18f..48b4c2a71 100644 --- a/hpat/tests/test_date.py +++ b/sdc/tests/test_date.py @@ -30,8 +30,8 @@ import numpy as np from math import sqrt import numba -import hpat -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, +import sdc +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, count_parfor_OneD_Vars, count_array_OneD_Vars, dist_IR_contains) @@ -45,7 +45,7 @@ def test_datetime_index_in(self): def test_impl(dti): return dti - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() dti = pd.DatetimeIndex(df['str_date']) np.testing.assert_array_equal(hpat_func(dti).values, test_impl(dti).values) @@ -54,7 +54,7 @@ def test_datetime_index(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -62,7 +62,7 @@ def test_datetime_index_kw(self): def test_impl(df): return pd.DatetimeIndex(data=df['str_date']).values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -70,7 +70,7 @@ def test_datetime_arg(self): def test_impl(A): return A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() A = pd.DatetimeIndex(df['str_date']).to_series() np.testing.assert_array_equal(hpat_func(A), test_impl(A)) @@ -79,7 +79,7 @@ def test_datetime_getitem(self): def test_impl(A): return A[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() A = pd.DatetimeIndex(df['str_date']).to_series() self.assertEqual(hpat_func(A), test_impl(A)) @@ -88,7 +88,7 @@ def test_ts_map(self): def test_impl(A): return A.map(lambda x: x.hour) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() A = pd.DatetimeIndex(df['str_date']).to_series() np.testing.assert_array_equal(hpat_func(A), test_impl(A)) @@ -97,7 +97,7 @@ def test_ts_map_date(self): def test_impl(A): return A.map(lambda x: x.date())[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() A = pd.DatetimeIndex(df['str_date']).to_series() np.testing.assert_array_equal(hpat_func(A), test_impl(A)) @@ -106,7 +106,7 @@ def test_ts_map_date2(self): def test_impl(df): return df.apply(lambda row: row.dt_ind.date(), axis=1)[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() df['dt_ind'] = pd.DatetimeIndex(df['str_date']) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -115,7 +115,7 @@ def test_ts_map_date_set(self): def test_impl(df): df['hpat_date'] = df.dt_ind.map(lambda x: x.date()) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() df['dt_ind'] = pd.DatetimeIndex(df['str_date']) hpat_func(df) @@ -126,7 +126,7 @@ def test_date_series_unbox(self): def test_impl(A): return A[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() A = pd.DatetimeIndex(df['str_date']).to_series().map(lambda x: x.date()) self.assertEqual(hpat_func(A), test_impl(A)) @@ -135,20 +135,20 @@ def test_date_series_unbox2(self): def test_impl(A): return A[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() A = pd.DatetimeIndex(df['str_date']).map(lambda x: x.date()) self.assertEqual(hpat_func(A), test_impl(A)) def test_datetime_index_set(self): def test_impl(df): - df['hpat'] = pd.DatetimeIndex(df['str_date']).values + df['sdc'] = pd.DatetimeIndex(df['str_date']).values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() hpat_func(df) df['std'] = pd.DatetimeIndex(df['str_date']) - allequal = (df['std'].equals(df['hpat'])) + allequal = (df['std'].equals(df['sdc'])) self.assertTrue(allequal) def test_timestamp(self): @@ -157,14 +157,14 @@ def test_impl(): ts = pd.Timestamp(dt) return ts.day + ts.hour + ts.microsecond + ts.month + ts.nanosecond + ts.second + ts.year - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_extract(self): def test_impl(s): return s.month - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) ts = pd.Timestamp(datetime(2017, 4, 26).isoformat()) month = hpat_func(ts) self.assertEqual(month, 4) @@ -173,7 +173,7 @@ def test_timestamp_date(self): def test_impl(s): return s.date() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) ts = pd.Timestamp(datetime(2017, 4, 26).isoformat()) self.assertEqual(hpat_func(ts), test_impl(ts)) @@ -182,7 +182,7 @@ def test_impl(df): return (df.A >= '2011-10-23').values df = pd.DataFrame({'A': pd.DatetimeIndex(['2015-01-03', '2010-10-11'])}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) def test_datetimeindex_str_comp2(self): @@ -190,7 +190,7 @@ def test_impl(df): return ('2011-10-23' <= df.A).values df = pd.DataFrame({'A': pd.DatetimeIndex(['2015-01-03', '2010-10-11'])}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) def test_datetime_index_df(self): @@ -198,7 +198,7 @@ def test_impl(df): df = pd.DataFrame({'A': pd.DatetimeIndex(df['str_date'])}) return df.A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -206,7 +206,7 @@ def test_datetime_index_date(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).date - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -214,7 +214,7 @@ def test_datetime_index_max(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).max() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() self.assertEqual(hpat_func(df), test_impl(df)) @@ -222,7 +222,7 @@ def test_datetime_index_min(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).min() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() self.assertEqual(hpat_func(df), test_impl(df)) @@ -232,7 +232,7 @@ def test_impl(df): t = s - s.min() return t.days - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -242,7 +242,7 @@ def test_impl(df): t = s - s.min() return t.seconds - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -252,7 +252,7 @@ def test_impl(df): t = s - s.min() return t.microseconds - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -262,7 +262,7 @@ def test_impl(df): t = s - s.min() return t.nanoseconds - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -270,7 +270,7 @@ def test_datetime_index_ret(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() pd.testing.assert_index_equal(hpat_func(df), test_impl(df), check_names=False) @@ -279,7 +279,7 @@ def test_datetime_index_year(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).year - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -287,7 +287,7 @@ def test_datetime_index_month(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).month - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -295,7 +295,7 @@ def test_datetime_index_day(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).day - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -303,7 +303,7 @@ def test_datetime_index_hour(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).hour - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -311,7 +311,7 @@ def test_datetime_index_minute(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).minute - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -319,7 +319,7 @@ def test_datetime_index_second(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).second - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -327,7 +327,7 @@ def test_datetime_index_microsecond(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).microsecond - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -335,7 +335,7 @@ def test_datetime_index_nanosecond(self): def test_impl(df): return pd.DatetimeIndex(df['str_date']).nanosecond - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -343,7 +343,7 @@ def test_datetime_series_dt_date(self): def test_impl(A): return A.dt.date - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() A = pd.DatetimeIndex(df['str_date']).to_series() # TODO: fix index and name @@ -355,7 +355,7 @@ def test_datetime_series_dt_year(self): def test_impl(A): return A.dt.year - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = self._gen_str_date_df() A = pd.DatetimeIndex(df['str_date']).to_series() # TODO: fix index and name diff --git a/hpat/tests/test_groupby.py b/sdc/tests/test_groupby.py similarity index 90% rename from hpat/tests/test_groupby.py rename to sdc/tests/test_groupby.py index 91bb745b3..aa3bd0733 100644 --- a/hpat/tests/test_groupby.py +++ b/sdc/tests/test_groupby.py @@ -30,8 +30,8 @@ import numpy as np import pyarrow.parquet as pq import numba -import hpat -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, +import sdc +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, dist_IR_contains, get_start_end) @@ -52,7 +52,7 @@ def test_impl(df): A = df.groupby('A')['B'].agg(lambda x: x.max() - x.min()) return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) # np.testing.assert_array_equal(hpat_func(df), test_impl(df)) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -62,7 +62,7 @@ def test_impl(df): A = df.groupby('A')['B'].sum() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -71,7 +71,7 @@ def test_impl(df): A = df.groupby('A')['B'].count() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -80,7 +80,7 @@ def test_impl(df): A = df.groupby('A')['B'].mean() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -89,7 +89,7 @@ def test_impl(df): A = df.groupby('A')['B'].min() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -98,7 +98,7 @@ def test_impl(df): df2 = df.groupby('A', as_index=False).min() return df2 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': pd.date_range('2019-1-3', '2019-1-9')}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -107,7 +107,7 @@ def test_impl(df): A = df.groupby('A')['B'].max() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -116,7 +116,7 @@ def test_impl(df): df2 = df.groupby('A').mean() return df2.B.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -125,7 +125,7 @@ def test_impl(df): df2 = df.groupby('A', as_index=False).mean() return df2.A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -134,7 +134,7 @@ def test_impl(df): A = df.groupby('A')['B'].prod() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -143,7 +143,7 @@ def test_impl(df): A = df.groupby('A')['B'].var() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -152,7 +152,7 @@ def test_impl(df): A = df.groupby('A')['B'].std() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -161,7 +161,7 @@ def test_impl(df): df2 = df.groupby('A')['B', 'C'].sum() return df2.C.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7], 'C': [3, 5, 6, 5, 4, 4, 3]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -171,7 +171,7 @@ def test_impl(df): A = df.groupby(['A', 'C'])['B'].sum() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7], 'C': [3, 5, 6, 5, 4, 4, 3]}) self.assertEqual(set(hpat_func(df)), set(test_impl(df))) @@ -182,7 +182,7 @@ def test_impl(in_A, in_B, in_C): A = df.groupby(['A', 'C'])['B'].sum() return A.sum() - hpat_func = hpat.jit(locals={'in_A:input': 'distributed', + hpat_func = sdc.jit(locals={'in_A:input': 'distributed', 'in_B:input': 'distributed', 'in_C:input': 'distributed'})(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7], @@ -204,7 +204,7 @@ def test_impl(n): A = df.groupby('A')['B'].agg(lambda x: x.max() - x.min()) return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -216,7 +216,7 @@ def test_impl(n): A = df.groupby('A')['B'].sum() return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -228,7 +228,7 @@ def test_impl(n): A = df.groupby('A')['B'].count() return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -240,7 +240,7 @@ def test_impl(n): A = df.groupby('A')['B'].mean() return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -252,7 +252,7 @@ def test_impl(n): A = df.groupby('A')['B'].min() return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -264,7 +264,7 @@ def test_impl(n): A = df.groupby('A')['B'].max() return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -276,7 +276,7 @@ def test_impl(n): A = df.groupby('A')['B'].var() return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -288,7 +288,7 @@ def test_impl(n): A = df.groupby('A')['B'].std() return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -302,7 +302,7 @@ def test_impl(): A = df.groupby('A')['B'].agg(lambda x: x.max() - x.min()) return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -313,7 +313,7 @@ def test_impl(n): df2 = df.groupby('A').max() return df2.B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -325,7 +325,7 @@ def test_impl(n): df2 = df.groupby('A', as_index=False).max() return df2.A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -337,7 +337,7 @@ def test_impl(df, cond): c = df2.groupby('A')['B'].count() return df2.C, c - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7], 'C': [2, 3, -1, 1, 2, 3, -1]}) cond = df.A > 1 res = test_impl(df, cond) @@ -350,7 +350,7 @@ def test_impl(df): A = df.groupby('A')['B'].agg(lambda x: (x == 'aa').sum()) return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': ['aa', 'b', 'b', 'b', 'aa', 'aa', 'b'], 'B': ['ccc', 'a', 'bb', 'aa', 'dd', 'ggg', 'rr']}) # np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -361,7 +361,7 @@ def test_impl(df): A = df.groupby('A')['B'].count() return A.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': ['aa', 'b', 'b', 'b', 'aa', 'aa', 'b'], 'B': ['ccc', 'a', 'bb', 'aa', 'dd', 'ggg', 'rr']}) # np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @@ -372,7 +372,7 @@ def test_impl(df): pt = df.pivot_table(index='A', columns='C', values='D', aggfunc='sum') return (pt.small.values, pt.large.values) - hpat_func = hpat.jit(pivots={'pt': ['small', 'large']})(test_impl) + hpat_func = sdc.jit(pivots={'pt': ['small', 'large']})(test_impl) self.assertEqual( set(hpat_func(_pivot_df1)[0]), set(test_impl(_pivot_df1)[0])) self.assertEqual( @@ -385,7 +385,7 @@ def test_impl(): res = pt.small.values.sum() return res - hpat_func = hpat.jit( + hpat_func = sdc.jit( pivots={'pt': ['small', 'large']})(test_impl) self.assertEqual(hpat_func(), test_impl()) @@ -394,7 +394,7 @@ def test_impl(df): pt = pd.crosstab(df.A, df.C) return (pt.small.values, pt.large.values) - hpat_func = hpat.jit(pivots={'pt': ['small', 'large']})(test_impl) + hpat_func = sdc.jit(pivots={'pt': ['small', 'large']})(test_impl) self.assertEqual( set(hpat_func(_pivot_df1)[0]), set(test_impl(_pivot_df1)[0])) self.assertEqual( @@ -407,7 +407,7 @@ def test_impl(): res = pt.small.values.sum() return res - hpat_func = hpat.jit( + hpat_func = sdc.jit( pivots={'pt': ['small', 'large']})(test_impl) self.assertEqual(hpat_func(), test_impl()) @@ -418,7 +418,7 @@ def test_impl(df): return group.count() df = pd.DataFrame({'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7]}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) diff --git a/hpat/tests/test_hiframes.py b/sdc/tests/test_hiframes.py similarity index 89% rename from hpat/tests/test_hiframes.py rename to sdc/tests/test_hiframes.py index 1f70c9243..b413e65f3 100644 --- a/hpat/tests/test_hiframes.py +++ b/sdc/tests/test_hiframes.py @@ -33,11 +33,11 @@ import string import pyarrow.parquet as pq import numba -import hpat +import sdc import os -from hpat import hiframes -from hpat.str_arr_ext import StringArray -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, +from sdc import hiframes +from sdc.str_arr_ext import StringArray +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, dist_IR_contains, get_start_end) @@ -51,7 +51,7 @@ def test_impl(df): df2['A'] += 10 return df2.A, df.A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame( {'A': np.arange(n), 'B': np.ones(n), 'C': np.random.ranf(n)}) @@ -64,7 +64,7 @@ def test_impl(n): df = pd.DataFrame({'A': S1, 'B': S2}) return df.A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -75,7 +75,7 @@ def test_getitem_bool_series(self): def test_impl(df): return df['A'][df['B']].values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [1, 2, 3], 'B': [True, False, True]}) np.testing.assert_array_equal(test_impl(df), hpat_func(df)) @@ -87,7 +87,7 @@ def test_impl(): B = df.A.fillna(5.0) return B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_fillna_inplace(self): @@ -98,7 +98,7 @@ def test_impl(): df.A.fillna(5.0, inplace=True) return df.A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_column_mean(self): @@ -108,7 +108,7 @@ def test_impl(): df = pd.DataFrame({'A': A}) return df.A.mean() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_column_var(self): @@ -118,7 +118,7 @@ def test_impl(): df = pd.DataFrame({'A': A}) return df.A.var() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) def test_column_std(self): @@ -128,7 +128,7 @@ def test_impl(): df = pd.DataFrame({'A': A}) return df.A.std() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) def test_column_map(self): @@ -138,7 +138,7 @@ def test_impl(n): return df.B.sum() n = 121 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) def test_column_map_arg(self): @@ -149,7 +149,7 @@ def test_impl(df): n = 121 df1 = pd.DataFrame({'A': np.arange(n)}) df2 = pd.DataFrame({'A': np.arange(n)}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) hpat_func(df1) self.assertTrue(hasattr(df1, 'B')) test_impl(df2) @@ -161,7 +161,7 @@ def test_impl(n): Ac = df.A.cumsum() return Ac.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -183,7 +183,7 @@ def test_impl(n): Ac = df.A.cumsum() return Ac.sum() + s + m + v + t - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -195,7 +195,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.float64)}) return df.A.quantile(.25) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -210,7 +210,7 @@ def test_impl(n): df.A[200:331] = np.nan return df.A.quantile(.25) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -223,7 +223,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.int32)}) return df.A.quantile(.25) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -236,7 +236,7 @@ def test_impl(A): df = pd.DataFrame({'A': A}) return df.A.quantile(.25) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 1001 A = np.arange(0, n, 1, np.float64) np.testing.assert_almost_equal(hpat_func(A), test_impl(A)) @@ -247,11 +247,11 @@ def test_impl(n): df.A[2] = 0 return df.A.nunique() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) # test compile again for overload related issues - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) def test_nunique_parallel(self): @@ -260,11 +260,11 @@ def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.four.nunique() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) # test compile again for overload related issues - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) @@ -273,11 +273,11 @@ def test_impl(n): df = pd.DataFrame({'A': ['aa', 'bb', 'aa', 'cc', 'cc']}) return df.A.nunique() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) # test compile again for overload related issues - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) @unittest.skip('AssertionError - fix needed\n' @@ -288,11 +288,11 @@ def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.two.nunique() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) # test compile again for overload related issues - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) @@ -302,7 +302,7 @@ def test_impl(): df = pq.read_table('example.parquet').to_pandas() return (df.four.unique() == 3.0).sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) @@ -314,7 +314,7 @@ def test_impl(): df = pq.read_table('example.parquet').to_pandas() return (df.two.unique() == 'foo').sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) @@ -323,7 +323,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.float64)}) return df.A.describe() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 1001 hpat_func(n) # XXX: test actual output @@ -337,7 +337,7 @@ def test_impl(): B = df.A.str.contains('AB*', regex=True) return B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), 2) def test_str_contains_noregex(self): @@ -347,7 +347,7 @@ def test_impl(): B = df.A.str.contains('BB', regex=False) return B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), 1) def test_str_replace_regex(self): @@ -355,7 +355,7 @@ def test_impl(df): return df.A.str.replace('AB*', 'EE', regex=True) df = pd.DataFrame({'A': ['ABCC', 'CABBD']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -364,7 +364,7 @@ def test_impl(df): return df.A.str.replace('AB', 'EE', regex=False) df = pd.DataFrame({'A': ['ABCC', 'CABBD']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -377,7 +377,7 @@ def test_impl(df): A = ['ABCC', 'CABBD', 'CCD', 'CCDAABB', 'ED'] start, end = get_start_end(n) df = pd.DataFrame({'A': A[start:end]}) - hpat_func = hpat.jit(distributed={'df', 'B'})(test_impl) + hpat_func = sdc.jit(distributed={'df', 'B'})(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) self.assertEqual(count_array_REPs(), 3) @@ -388,7 +388,7 @@ def test_impl(df): return df.A.str.split(',') df = pd.DataFrame({'A': ['AB,CC', 'C,ABB,D', 'G', '', 'g,f']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -397,7 +397,7 @@ def test_impl(df): return df.A.str.split() df = pd.DataFrame({'A': ['AB CC', 'C ABB D', 'G ', ' ', 'g\t f']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -408,7 +408,7 @@ def test_impl(df): return df2[df2.B.str.len() > 1] df = pd.DataFrame({'A': ['AB,CC', 'C,ABB,D', 'G', '', 'g,f']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal( hpat_func(df), test_impl(df).reset_index(drop=True)) @@ -417,7 +417,7 @@ def test_impl(df): return pd.DataFrame({'B': df.A.str.split(',')}) df = pd.DataFrame({'A': ['AB,CC', 'C,ABB,D']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(df).B, test_impl(df).B, check_names=False) @@ -427,7 +427,7 @@ def test_impl(df): df = pd.DataFrame({'A': ['AB,CC', 'C,ABB,D']}) df2 = pd.DataFrame({'A': df.A.str.split(',')}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(df2), test_impl(df2)) def test_str_split_bool_index(self): @@ -436,7 +436,7 @@ def test_impl(df): return C[df.B == 'aa'] df = pd.DataFrame({'A': ['AB,CC', 'C,ABB,D'], 'B': ['aa', 'bb']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -449,7 +449,7 @@ def test_impl(df): start, end = get_start_end(n) A = ['AB,CC', 'C,ABB,D', 'CAD', 'CA,D', 'AA,,D'] df = pd.DataFrame({'A': A[start:end]}) - hpat_func = hpat.jit(distributed={'df', 'B'})(test_impl) + hpat_func = sdc.jit(distributed={'df', 'B'})(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) self.assertEqual(count_array_REPs(), 3) @@ -461,7 +461,7 @@ def test_impl(df): return B.str.get(1) df = pd.DataFrame({'A': ['AB,CC', 'C,ABB,D']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -470,7 +470,7 @@ def test_impl(df): return df.A.str.split(',') df = pd.DataFrame({'A': ['AB,CC', 'C,ABB,D']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal(hpat_func(df), test_impl(df), check_names=False) def test_str_get_parallel(self): @@ -483,7 +483,7 @@ def test_impl(df): start, end = get_start_end(n) A = ['AB,CC', 'C,ABB,D', 'CAD,F', 'CA,D', 'AA,,D'] df = pd.DataFrame({'A': A[start:end]}) - hpat_func = hpat.jit(distributed={'df', 'B'})(test_impl) + hpat_func = sdc.jit(distributed={'df', 'B'})(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) self.assertEqual(count_array_REPs(), 3) @@ -496,7 +496,7 @@ def test_impl(df): return C df = pd.DataFrame({'A': ['AB,12', 'C,321,D']}) - hpat_func = hpat.jit(locals={'C': hpat.int64[:]})(test_impl) + hpat_func = sdc.jit(locals={'C': sdc.int64[:]})(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -506,7 +506,7 @@ def test_impl(df): return pd.Series(list(itertools.chain(*A))) df = pd.DataFrame({'A': ['AB,CC', 'C,ABB,D']}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -520,7 +520,7 @@ def test_impl(df): start, end = get_start_end(n) A = ['AB,CC', 'C,ABB,D', 'CAD', 'CA,D', 'AA,,D'] df = pd.DataFrame({'A': A[start:end]}) - hpat_func = hpat.jit(distributed={'df', 'B'})(test_impl) + hpat_func = sdc.jit(distributed={'df', 'B'})(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) self.assertEqual(count_array_REPs(), 3) @@ -532,7 +532,7 @@ def test_impl(df): return B df = pd.DataFrame({'A': ['123.1', '331.2']}) - hpat_func = hpat.jit(locals={'B': hpat.float64[:]})(test_impl) + hpat_func = sdc.jit(locals={'B': sdc.float64[:]})(test_impl) pd.testing.assert_series_equal( hpat_func(df), test_impl(df), check_names=False) @@ -542,7 +542,7 @@ def test_impl(n): df1 = df[df.A > 5] return len(df1.B) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -555,7 +555,7 @@ def test_impl(n): Ac = df.A.rolling(3).sum() return Ac.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -567,7 +567,7 @@ def test_impl_2(n): Ac = df.A.rolling(7).sum() return Ac.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -579,7 +579,7 @@ def test_impl(n): df['moving average'] = df.A.rolling(window=5, center=True).mean() return df['moving average'].sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -591,7 +591,7 @@ def test_impl(n): Ac = df.A.rolling(3, center=True).apply(lambda a: a[0] + 2 * a[1] + a[2]) return Ac.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -605,7 +605,7 @@ def test_impl(n): Ac = df.A.shift(1) return Ac.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -619,7 +619,7 @@ def test_impl(n): Ac = df.A.pct_change(1) return Ac.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -631,7 +631,7 @@ def test_impl(df): n = 121 df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df), test_impl(df)) def test_df_input2(self): @@ -641,7 +641,7 @@ def test_impl(df): n = 11 df = pd.DataFrame({'A': np.random.ranf(3 * n), 'B': ['one', 'two', 'three'] * n}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df), test_impl(df)) def test_df_input_dist1(self): @@ -655,7 +655,7 @@ def test_impl(df): start, end = get_start_end(n) df = pd.DataFrame({'A': A, 'B': B}) df_h = pd.DataFrame({'A': A[start:end], 'B': B[start:end]}) - hpat_func = hpat.jit(distributed={'df'})(test_impl) + hpat_func = sdc.jit(distributed={'df'})(test_impl) np.testing.assert_almost_equal(hpat_func(df_h), test_impl(df)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -667,7 +667,7 @@ def test_impl(n): df3 = pd.concat([df1, df2]) return df3.A.sum() + df3.key2.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -682,7 +682,7 @@ def test_impl(): A3 = pd.concat([df1, df2]) return (A3.two == 'foo').sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -694,7 +694,7 @@ def test_impl(n): A3 = pd.concat([df1.A, df2.A]) return A3.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -709,7 +709,7 @@ def test_impl(): A3 = pd.concat([df1.two, df2.two]) return (A3 == 'foo').sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -719,7 +719,7 @@ def test_intraday(self): def test_impl(nsyms): max_num_days = 100 all_res = 0.0 - for i in hpat.prange(nsyms): + for i in sdc.prange(nsyms): s_open = 20 * np.ones(max_num_days) s_low = 28 * np.ones(max_num_days) s_close = 19 * np.ones(max_num_days) @@ -734,7 +734,7 @@ def test_impl(nsyms): all_res += df['Rets'].mean() return all_res - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 0) @@ -753,7 +753,7 @@ def test_impl(A, B): A = np.array([1, 1, 2, 3]) B = np.array([3, 4, 5, 6]) - hpat_func = hpat.jit(locals={'A:input': 'distributed', + hpat_func = sdc.jit(locals={'A:input': 'distributed', 'B:input': 'distributed', 'df2:return': 'distributed'})(test_impl) start, end = get_start_end(len(A)) df2 = hpat_func(A[start:end], B[start:end]) diff --git a/hpat/tests/test_hpat_jit.py b/sdc/tests/test_hpat_jit.py similarity index 95% rename from hpat/tests/test_hpat_jit.py rename to sdc/tests/test_hpat_jit.py index 1d164e7e1..63de6c7c7 100644 --- a/hpat/tests/test_hpat_jit.py +++ b/sdc/tests/test_hpat_jit.py @@ -27,11 +27,11 @@ import unittest import platform -import hpat +import sdc import numba import numpy as np import pandas as pd -from hpat import * +from sdc import * from numba.typed import Dict from collections import defaultdict @@ -177,7 +177,7 @@ def test_series_binop_int_casting(self): def test_impl(A): res = A + 42 return res.dtype - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) A = np.ones(1, dtype='int32') self.assertEqual(hpat_func(A), test_impl(A)) @@ -191,7 +191,7 @@ def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) @@ -206,7 +206,7 @@ def test_impl(n): df['A'] = np.arange(n) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) @@ -219,7 +219,7 @@ def test_set_column_reflect4(self): def test_impl(df, n): df['A'] = np.arange(n) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df1 = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.arange(n) + 3.0}) df2 = df1.copy() @@ -238,7 +238,7 @@ def test_impl(n): df['A'] = np.arange(n) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) @@ -253,7 +253,7 @@ def test_impl(n): df['C'] = np.arange(n) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) @@ -266,7 +266,7 @@ def test_set_column_reflect3(self): def test_impl(df, n): df['C'] = np.arange(n) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df1 = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.arange(n) + 3.0}) df2 = df1.copy() @@ -283,7 +283,7 @@ def test_series_op2_issue(self): for operator in arithmetic_binops: test_impl = _make_func_use_binop1(operator) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(1, n)}) @@ -298,7 +298,7 @@ def test_series_op5_integer_scalar_issue(self): for method in arithmetic_methods: test_impl = _make_func_use_method_arg1(method) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 operand_series = pd.Series(np.arange(1, n)) @@ -315,7 +315,7 @@ def test_series_op5_integer_scalar_issue(self): def test_series_fusion1_issue(self): def test_impl(A, B): return A + B + 1 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 A = pd.Series(np.arange(n)) @@ -334,7 +334,7 @@ def test_impl(A, B): if A[0] == 0: S = A + 1 return S + B - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 A = pd.Series(np.arange(n)) @@ -353,7 +353,7 @@ def test_impl(): names=['A', 'B', 'C', 'D'], dtype={'A': np.int, 'B': np.float, 'C': np.float, 'D': np.int}, ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skipIf(platform.system() == 'Windows', @@ -368,7 +368,7 @@ def test_impl(): names=dtype.keys(), dtype=dtype, ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skipIf(platform.system() == 'Windows', @@ -383,7 +383,7 @@ def test_impl(): names=dtype.keys(), dtype=dtype, ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skipIf(platform.system() == 'Windows', @@ -398,7 +398,7 @@ def test_impl(): dtype={'A': np.int, 'B': np.float, 'C': np.float, 'D': np.int}, skiprows=2, ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skipIf(platform.system() == 'Windows', @@ -412,7 +412,7 @@ def test_impl(): names=['A', 'B', 'C', 'D'], dtype={'A': np.int, 'B': np.float, 'C': str, 'D': np.int}, parse_dates=[2]) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skipIf(platform.system() == 'Windows', @@ -425,7 +425,7 @@ def test_impl(): return pd.read_csv("csv_data_date1.csv", names=['A', 'B', 'C', 'D'], dtype={'A': np.int, 'B': np.float, 'C': str, 'D': np.int}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) diff --git a/hpat/tests/test_io.py b/sdc/tests/test_io.py similarity index 91% rename from hpat/tests/test_io.py rename to sdc/tests/test_io.py index 2aad6c9c8..0823ecefb 100644 --- a/hpat/tests/test_io.py +++ b/sdc/tests/test_io.py @@ -33,8 +33,8 @@ import numpy as np import h5py import pyarrow.parquet as pq -import hpat -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, +import sdc +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, dist_IR_contains, get_rank, get_start_end) from numba.config import IS_32BITS @@ -87,7 +87,7 @@ def test_impl(): f.close() return X - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_allclose(hpat_func(), test_impl()) @unittest.skip('Error - fix needed\n' @@ -101,7 +101,7 @@ def test_impl(): f.close() return X - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_allclose(hpat_func(), test_impl()) @unittest.skip('Error - fix needed\n' @@ -114,7 +114,7 @@ def test_impl(): f.close() return X.sum() + Y.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl(), decimal=2) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -133,7 +133,7 @@ def test_impl(N, D): N = 101 D = 10 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) hpat_func(N, D) f = h5py.File("lr_w.hdf5", "r") X = f['points'][:] @@ -156,7 +156,7 @@ def test_impl(n, fname): n = 101 arr = np.arange(n) fname = "test_group.hdf5" - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) hpat_func(n, fname) f = h5py.File(fname, "r") X = f['G']['data'][:] @@ -173,7 +173,7 @@ def test_impl(): f.close() return X.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) @unittest.skip('Error - fix needed\n' @@ -188,10 +188,10 @@ def test_impl(): f.close() return s - hpat_func = hpat.jit(test_impl, h5_types={'X': hpat.int64[:]}) + hpat_func = sdc.jit(test_impl, h5_types={'X': sdc.int64[:]}) self.assertEqual(hpat_func(), test_impl()) # test using locals for typing - hpat_func = hpat.jit(test_impl, locals={'X': hpat.int64[:]}) + hpat_func = sdc.jit(test_impl, locals={'X': sdc.int64[:]}) self.assertEqual(hpat_func(), test_impl()) @unittest.skip('Error - fix needed\n' @@ -207,7 +207,7 @@ def test_impl(): f.close() return s - hpat_func = hpat.jit(test_impl, h5_types={'X': hpat.int64[:]}) + hpat_func = sdc.jit(test_impl, h5_types={'X': sdc.int64[:]}) self.assertEqual(hpat_func(), test_impl()) @unittest.skip('Error - fix needed\n' @@ -220,7 +220,7 @@ def test_impl(): f.close() return X - hpat_func = hpat.jit(locals={'X:return': 'distributed'})(test_impl) + hpat_func = sdc.jit(locals={'X:return': 'distributed'})(test_impl) n = 4 # len(test_impl()) start, end = get_start_end(n) np.testing.assert_allclose(hpat_func(), test_impl()[start:end]) @@ -234,7 +234,7 @@ def test_impl(): X = df['points'] return X.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -245,7 +245,7 @@ def test_impl(): X = df['points'] return X.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -260,7 +260,7 @@ def test_impl(): X = df['points'] return X.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -273,7 +273,7 @@ def test_impl(): X = df['points'] return X.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -286,7 +286,7 @@ def test_impl(): A = df.two.values == 'foo' return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -299,7 +299,7 @@ def test_impl(): A = df.five.values == 'foo' return A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) @unittest.skip('Error - fix needed\n' @@ -310,7 +310,7 @@ def test_impl(): A = df.five.values == 'foo' return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -321,7 +321,7 @@ def test_impl(): A = df.five.values == 'foo' return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -333,7 +333,7 @@ def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.three.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -345,7 +345,7 @@ def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.one.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -357,7 +357,7 @@ def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.four.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -367,7 +367,7 @@ def test_impl(): df = pd.read_parquet('pandas_dt.pq') return pd.DataFrame({'DT64': df.DT64, 'col2': df.DATE}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skip('Error: Attribute "dtype" are different\n' @@ -378,7 +378,7 @@ def test_impl(): df = pd.read_parquet('sdf_dt.pq') return pd.DataFrame({'DT64': df.DT64, 'col2': df.DATE}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv1(self): @@ -396,7 +396,7 @@ def test_impl(): names=['A', 'B', 'C', 'D'], dtype={'A': np.int, 'B': np.float, 'C': np.float, 'D': np.int}, ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_keys1(self): @@ -414,7 +414,7 @@ def test_impl(): names=dtype.keys(), dtype=dtype, ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_const_dtype1(self): @@ -432,14 +432,14 @@ def test_impl(): names=dtype.keys(), dtype=dtype, ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_infer1(self): def test_impl(): return pd.read_csv("csv_data_infer1.csv") - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_infer_parallel1(self): @@ -447,7 +447,7 @@ def test_impl(): df = pd.read_csv("csv_data_infer1.csv") return df.A.sum(), df.B.sum(), df.C.sum(), df.D.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_csv_skip1(self): @@ -465,14 +465,14 @@ def test_impl(): dtype={'A': np.int, 'B': np.float, 'C': np.float, 'D': np.int}, skiprows=2, ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_infer_skip1(self): def test_impl(): return pd.read_csv("csv_data_infer1.csv", skiprows=2) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_infer_skip_parallel1(self): @@ -481,7 +481,7 @@ def test_impl(): names=['A', 'B', 'C', 'D']) return df.A.sum(), df.B.sum(), df.C.sum(), df.D.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_csv_rm_dead1(self): @@ -490,7 +490,7 @@ def test_impl(): names=['A', 'B', 'C', 'D'], dtype={'A': np.int, 'B': np.float, 'C': np.float, 'D': np.int},) return df.B.values - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl()) def test_csv_date1(self): @@ -506,7 +506,7 @@ def test_impl(): names=['A', 'B', 'C', 'D'], dtype={'A': np.int, 'B': np.float, 'C': str, 'D': np.int}, parse_dates=[2]) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_str1(self): @@ -520,7 +520,7 @@ def test_impl(): return pd.read_csv("csv_data_date1.csv", names=['A', 'B', 'C', 'D'], dtype={'A': np.int, 'B': np.float, 'C': str, 'D': np.int}) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_parallel1(self): @@ -529,7 +529,7 @@ def test_impl(): names=['A', 'B', 'C', 'D'], dtype={'A': np.int, 'B': np.float, 'C': np.float, 'D': np.int}) return (df.A.sum(), df.B.sum(), df.C.sum(), df.D.sum()) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_csv_str_parallel1(self): @@ -539,7 +539,7 @@ def test_impl(): dtype={'A': np.int, 'B': np.float, 'C': str, 'D': np.int}) return (df.A.sum(), df.B.sum(), (df.C == '1966-11-13').sum(), df.D.sum()) - hpat_func = hpat.jit(locals={'df:return': 'distributed'})(test_impl) + hpat_func = sdc.jit(locals={'df:return': 'distributed'})(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_csv_usecols1(self): @@ -549,7 +549,7 @@ def test_impl(): dtype={'C': np.float}, usecols=[2], ) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_cat1(self): @@ -561,7 +561,7 @@ def test_impl(): dtype=dtypes, ) return df.C2 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal( hpat_func(), test_impl(), check_names=False) @@ -582,7 +582,7 @@ def test_impl(): dtype={'C1': np.int, 'C2': ct_dtype, 'C3': str}, ) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_csv_single_dtype1(self): @@ -592,7 +592,7 @@ def test_impl(): dtype=np.float64, ) return df - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skip('TypeError: to_csv() takes from 1 to 20 positional arguments but 21 were given)\n' @@ -601,10 +601,10 @@ def test_write_csv1(self): def test_impl(df, fname): df.to_csv(fname) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 111 df = pd.DataFrame({'A': np.arange(n)}) - hp_fname = 'test_write_csv1_hpat.csv' + hp_fname = 'test_write_csv1_sdc.csv' pd_fname = 'test_write_csv1_pd.csv' hpat_func(df, hp_fname) test_impl(df, pd_fname) @@ -612,13 +612,13 @@ def test_impl(df, fname): pd.testing.assert_frame_equal(pd.read_csv(hp_fname), pd.read_csv(pd_fname)) @unittest.skip('AttributeError: Failed in hpat mode pipeline (step: convert to distributed)\n' - 'module \'hpat.hio\' has no attribute \'file_write_parallel\'') + 'module \'sdc.hio\' has no attribute \'file_write_parallel\'') def test_write_csv_parallel1(self): def test_impl(n, fname): df = pd.DataFrame({'A': np.arange(n)}) df.to_csv(fname) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 111 hp_fname = 'test_write_csv1_hpat_par.csv' pd_fname = 'test_write_csv1_pd_par.csv' @@ -636,7 +636,7 @@ def test_impl(): A = np.fromfile("np_file1.dat", np.float64) return A - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) def test_np_io2(self): @@ -645,7 +645,7 @@ def test_impl(): A = np.fromfile("np_file1.dat", np.float64) return A.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @@ -655,7 +655,7 @@ def test_impl(A): if get_rank() == 0: A.tofile("np_file_3.dat") - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 111 A = np.random.ranf(n) hpat_func(A) @@ -669,7 +669,7 @@ def test_impl(n): A = np.arange(n) A.tofile("np_file_3.dat") - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 111 A = np.arange(n) hpat_func(n) diff --git a/hpat/tests/test_join.py b/sdc/tests/test_join.py similarity index 94% rename from hpat/tests/test_join.py rename to sdc/tests/test_join.py index e94bafc1b..34ae874c0 100644 --- a/hpat/tests/test_join.py +++ b/sdc/tests/test_join.py @@ -34,9 +34,9 @@ import pyarrow.parquet as pq from pandas.api.types import CategoricalDtype import numba -import hpat -from hpat.str_arr_ext import StringArray -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, +import sdc +from sdc.str_arr_ext import StringArray +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, dist_IR_contains, get_start_end) @@ -50,7 +50,7 @@ def test_impl(n): df3 = pd.merge(df1, df2, left_on='key1', right_on='key2') return df3.B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -63,7 +63,7 @@ def test_impl(df1, df2): df3 = df1.merge(df2, left_on='key1', right_on='key2') return df3 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df1 = pd.DataFrame({'key1': np.arange(n) + 3, 'A': np.arange(n) + 1.0}) df2 = pd.DataFrame({'key2': 2 * np.arange(n) + 1, 'B': n + np.arange(n) + 1.0}) @@ -80,7 +80,7 @@ def test_impl(): df3 = pd.merge(df1, df2, left_on='key1', right_on='key2') return df3.B - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(set(hpat_func()), set(test_impl())) def test_join1_seq_str_na(self): @@ -91,14 +91,14 @@ def test_impl(): df3 = df1.merge(df2, left_on='key1', right_on='key2', how='left') return df3.B - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(set(hpat_func()), set(test_impl())) def test_join_mutil_seq1(self): def test_impl(df1, df2): return df1.merge(df2, on=['A', 'B']) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df1 = pd.DataFrame({'A': [3, 1, 1, 3, 4], 'B': [1, 2, 3, 2, 3], 'C': [7, 8, 9, 4, 5]}) @@ -116,7 +116,7 @@ def test_impl(A1, B1, C1, A2, B2, D2): df3 = df1.merge(df2, on=['A', 'B']) return df3.C.sum() + df3.D.sum() - hpat_func = hpat.jit(locals={ + hpat_func = sdc.jit(locals={ 'A1:input': 'distributed', 'B1:input': 'distributed', 'C1:input': 'distributed', @@ -157,7 +157,7 @@ def test_impl(A1, B1, C1, A2, B2, D2): df3 = df1.merge(df2, on=('A', 'B')) return df3.C.sum() + df3.D.sum() - hpat_func = hpat.jit(locals={ + hpat_func = sdc.jit(locals={ 'A1:input': 'distributed', 'B1:input': 'distributed', 'C1:input': 'distributed', })(test_impl) @@ -191,7 +191,7 @@ def test_join_datetime_seq1(self): def test_impl(df1, df2): return pd.merge(df1, df2, on='time') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df1 = pd.DataFrame( {'time': pd.DatetimeIndex( ['2017-01-03', '2017-01-06', '2017-02-21']), 'B': [4, 5, 6]}) @@ -206,7 +206,7 @@ def test_impl(df1, df2): df3 = pd.merge(df1, df2, on='time') return (df3.A.sum(), df3.time.max(), df3.B.sum()) - hpat_func = hpat.jit(distributed=['df1', 'df2'])(test_impl) + hpat_func = sdc.jit(distributed=['df1', 'df2'])(test_impl) df1 = pd.DataFrame( {'time': pd.DatetimeIndex( ['2017-01-03', '2017-01-06', '2017-02-21']), 'B': [4, 5, 6]}) @@ -225,7 +225,7 @@ def test_merge_asof_seq1(self): def test_impl(df1, df2): return pd.merge_asof(df1, df2, on='time') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df1 = pd.DataFrame( {'time': pd.DatetimeIndex( ['2017-01-03', '2017-01-06', '2017-02-21']), 'B': [4, 5, 6]}) @@ -243,14 +243,14 @@ def test_impl(): df3 = pd.merge_asof(df1, df2, on='time') return (df3.A.sum(), df3.time.max(), df3.B.sum()) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_join_left_seq1(self): def test_impl(df1, df2): return pd.merge(df1, df2, how='left', on='key') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df1 = pd.DataFrame( {'key': [2, 3, 5, 1, 2, 8], 'A': np.array([4, 6, 3, 9, 9, -1], np.float)}) df2 = pd.DataFrame( @@ -267,7 +267,7 @@ def test_join_left_seq2(self): def test_impl(df1, df2): return pd.merge(df1, df2, how='left', on='key') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) # test left run where a key is repeated on left but not right side df1 = pd.DataFrame( {'key': [2, 3, 5, 3, 2, 8], 'A': np.array([4, 6, 3, 9, 9, -1], np.float)}) @@ -285,7 +285,7 @@ def test_join_right_seq1(self): def test_impl(df1, df2): return pd.merge(df1, df2, how='right', on='key') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df1 = pd.DataFrame( {'key': [2, 3, 5, 1, 2, 8], 'A': np.array([4, 6, 3, 9, 9, -1], np.float)}) df2 = pd.DataFrame( @@ -302,7 +302,7 @@ def test_join_outer_seq1(self): def test_impl(df1, df2): return pd.merge(df1, df2, how='outer', on='key') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df1 = pd.DataFrame( {'key': [2, 3, 5, 1, 2, 8], 'A': np.array([4, 6, 3, 9, 9, -1], np.float)}) df2 = pd.DataFrame( @@ -323,7 +323,7 @@ def test_impl(df1, df2, df3, df4): o2 = df3.merge(df4, on=['B']) return o1, o2 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 df1 = pd.DataFrame({'A': np.arange(n) + 3, 'AA': np.arange(n) + 1.0}) df2 = pd.DataFrame({'A': 2 * np.arange(n) + 1, 'AAA': n + np.arange(n) + 1.0}) @@ -345,7 +345,7 @@ def test_impl(): df3 = df1.merge(df2, on='C1') return df3 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skipIf(platform.system() == 'Windows', "error on windows") @@ -363,7 +363,7 @@ def test_impl(): df3 = df1.merge(df2, on='C1', how='right') return df3 - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal( hpat_func().sort_values('C1').reset_index(drop=True), test_impl().sort_values('C1').reset_index(drop=True)) @@ -383,7 +383,7 @@ def test_impl(): df3 = df1.merge(df2, on='C1') return df3 - hpat_func = hpat.jit(distributed=['df3'])(test_impl) + hpat_func = sdc.jit(distributed=['df3'])(test_impl) # TODO: check results self.assertTrue((hpat_func().columns == test_impl().columns).all()) diff --git a/hpat/tests/test_ml.py b/sdc/tests/test_ml.py similarity index 94% rename from hpat/tests/test_ml.py rename to sdc/tests/test_ml.py index d5a8ae9c2..40d27d85d 100644 --- a/hpat/tests/test_ml.py +++ b/sdc/tests/test_ml.py @@ -30,8 +30,8 @@ import numpy as np from math import sqrt import numba -import hpat -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, +import sdc +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, count_parfor_OneD_Vars, count_array_OneD_Vars, dist_IR_contains, check_numba_version) @@ -50,7 +50,7 @@ def test_impl(n, d): w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) return w - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 d = 4 np.testing.assert_allclose(hpat_func(n, d), test_impl(n, d)) @@ -71,7 +71,7 @@ def test_impl(N, D): accuracy = np.sum(R == Y) / N return accuracy - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 d = 4 np.testing.assert_approx_equal(hpat_func(n, d), test_impl(n, d)) @@ -90,7 +90,7 @@ def test_impl(N, D): w -= alphaN * np.dot(X.T, np.dot(X, w) - Y) return w - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 d = 4 np.testing.assert_allclose(hpat_func(n, d), test_impl(n, d)) @@ -104,14 +104,14 @@ def test_impl(n): points = np.array([-1.0, 2.0, 5.0]) N = points.shape[0] exps = 0 - for i in hpat.prange(n): + for i in sdc.prange(n): p = X[i] d = (-(p - points)**2) / (2 * b**2) m = np.min(d) exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) return exps - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 np.testing.assert_approx_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 1) @@ -134,7 +134,7 @@ def test_impl(numCenter, numIter, N, D): return centroids - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) n = 11 np.testing.assert_allclose(hpat_func(1, 1, n, 2), test_impl(1, 1, n, 2)) self.assertEqual(count_array_OneDs(), 4) diff --git a/hpat/tests/test_rolling.py b/sdc/tests/test_rolling.py similarity index 94% rename from hpat/tests/test_rolling.py rename to sdc/tests/test_rolling.py index 9c1b1f97e..2153621bc 100644 --- a/hpat/tests/test_rolling.py +++ b/sdc/tests/test_rolling.py @@ -32,10 +32,10 @@ import platform import numpy as np import numba -import hpat -from hpat.tests.test_utils import (count_array_REPs, count_parfor_REPs, +import sdc +from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs, count_parfor_OneDs, count_array_OneDs, dist_IR_contains) -from hpat.hiframes.rolling import supported_rolling_funcs +from sdc.hiframes.rolling import supported_rolling_funcs LONG_TEST = (int(os.environ['SDC_LONG_ROLLING_TEST']) != 0 if 'SDC_LONG_ROLLING_TEST' in os.environ else False) @@ -59,7 +59,7 @@ def test_fixed1(self): loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for args in itertools.product(wins, centers): df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) @@ -80,7 +80,7 @@ def test_fixed2(self): loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for n, w, c in itertools.product(sizes, wins, centers): df = pd.DataFrame({'B': np.arange(n)}) pd.testing.assert_frame_equal(hpat_func(df, w, c), test_impl(df, w, c)) @@ -89,7 +89,7 @@ def test_fixed_apply1(self): # test sequentially with manually created dfs def test_impl(df, w, c): return df.rolling(w, center=c).apply(lambda a: a.sum()) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) wins = (3,) if LONG_TEST: wins = (2, 3, 5) @@ -104,7 +104,7 @@ def test_fixed_apply2(self): # test sequentially with generated dfs def test_impl(df, w, c): return df.rolling(w, center=c).apply(lambda a: a.sum()) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) sizes = (121,) wins = (3,) if LONG_TEST: @@ -121,7 +121,7 @@ def test_impl(n, w, center): R = df.rolling(w, center=center).sum() return R.B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) sizes = (121,) wins = (5,) if LONG_TEST: @@ -140,7 +140,7 @@ def test_impl(n, w, center): R = df.rolling(w, center=center).apply(lambda a: a.sum()) return R.B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) sizes = (121,) wins = (5,) if LONG_TEST: @@ -176,7 +176,7 @@ def test_variable1(self): loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) # XXX: skipping min/max for this test since the behavior of Pandas # is inconsistent: it assigns NaN to last output instead of 4! if func_name not in ('min', 'max'): @@ -196,7 +196,7 @@ def test_variable2(self): loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for n in sizes: time = pd.date_range(start='1/1/2018', periods=n, freq='s') df = pd.DataFrame({'B': np.arange(n), 'time': time}) @@ -225,7 +225,7 @@ def test_variable_apply1(self): loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df1), test_impl(df1)) pd.testing.assert_frame_equal(hpat_func(df2), test_impl(df2)) @@ -243,7 +243,7 @@ def test_variable_apply2(self): loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for n in sizes: time = pd.date_range(start='1/1/2018', periods=n, freq='s') df = pd.DataFrame({'B': np.arange(n), 'time': time}) @@ -267,7 +267,7 @@ def test_variable_parallel1(self): loc_vars = {} exec(func_text, {'pd': pd, 'np': np}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for n in sizes: np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -291,7 +291,7 @@ def test_variable_apply_parallel1(self): loc_vars = {} exec(func_text, {'pd': pd, 'np': np}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for n in sizes: np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) @@ -311,7 +311,7 @@ def test_series_fixed1(self): loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for args in itertools.product(wins, centers): pd.testing.assert_series_equal(hpat_func(S1, *args), test_impl(S1, *args)) pd.testing.assert_series_equal(hpat_func(S2, *args), test_impl(S2, *args)) @@ -319,7 +319,7 @@ def test_series_fixed1(self): def apply_test_impl(S, w, c): return S.rolling(w, center=c).apply(lambda a: a.sum()) - hpat_func = hpat.jit(apply_test_impl) + hpat_func = sdc.jit(apply_test_impl) for args in itertools.product(wins, centers): pd.testing.assert_series_equal(hpat_func(S1, *args), apply_test_impl(S1, *args)) pd.testing.assert_series_equal(hpat_func(S2, *args), apply_test_impl(S2, *args)) @@ -336,14 +336,14 @@ def test_series_cov1(self): def test_impl(S, S2, w, c): return S.rolling(w, center=c).cov(S2) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for args in itertools.product([S1, S2], [S1, S2], wins, centers): pd.testing.assert_series_equal(hpat_func(*args), test_impl(*args)) pd.testing.assert_series_equal(hpat_func(*args), test_impl(*args)) def test_impl2(S, S2, w, c): return S.rolling(w, center=c).corr(S2) - hpat_func = hpat.jit(test_impl2) + hpat_func = sdc.jit(test_impl2) for args in itertools.product([S1, S2], [S1, S2], wins, centers): pd.testing.assert_series_equal(hpat_func(*args), test_impl2(*args)) pd.testing.assert_series_equal(hpat_func(*args), test_impl2(*args)) @@ -360,14 +360,14 @@ def test_df_cov1(self): def test_impl(df, df2, w, c): return df.rolling(w, center=c).cov(df2) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for args in itertools.product([df1, df2], [df1, df2], wins, centers): pd.testing.assert_frame_equal(hpat_func(*args), test_impl(*args)) pd.testing.assert_frame_equal(hpat_func(*args), test_impl(*args)) def test_impl2(df, df2, w, c): return df.rolling(w, center=c).corr(df2) - hpat_func = hpat.jit(test_impl2) + hpat_func = sdc.jit(test_impl2) for args in itertools.product([df1, df2], [df1, df2], wins, centers): pd.testing.assert_frame_equal(hpat_func(*args), test_impl2(*args)) pd.testing.assert_frame_equal(hpat_func(*args), test_impl2(*args)) diff --git a/hpat/tests/test_series.py b/sdc/tests/test_series.py similarity index 96% rename from hpat/tests/test_series.py rename to sdc/tests/test_series.py index 559893aca..3a948178f 100644 --- a/hpat/tests/test_series.py +++ b/sdc/tests/test_series.py @@ -32,11 +32,11 @@ import pandas as pd import numpy as np import pyarrow.parquet as pq -import hpat +import sdc from itertools import islice, permutations -from hpat.tests.test_utils import ( +from sdc.tests.test_utils import ( count_array_REPs, count_parfor_REPs, count_array_OneDs, get_start_end) -from hpat.tests.gen_test_data import ParquetGenerator +from sdc.tests.gen_test_data import ParquetGenerator from numba import types from numba.config import IS_32BITS from numba.errors import TypingError @@ -212,7 +212,7 @@ def _make_func_use_method_arg1(method): class TestSeries(unittest.TestCase): def jit(self, *args, **kwargs): - return hpat.jit(*args, **kwargs) + return sdc.jit(*args, **kwargs) def test_create1(self): def test_impl(): @@ -558,7 +558,7 @@ def test_impl(A, deep): self.assertEqual(actual.index is S.index, expected.index is S.index) self.assertEqual(actual.index is S.index, not deep) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.corr() parameter "min_periods" unsupported') def test_series_corr(self): def test_series_corr_impl(S1, S2, min_periods=None): @@ -586,7 +586,7 @@ def test_series_corr_impl(S1, S2, min_periods=None): result = hpat_func(S1, S2, min_periods=period) np.testing.assert_allclose(result, result_ref) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.corr() parameter "min_periods" unsupported') def test_series_corr_unsupported_dtype(self): def test_series_corr_impl(S1, S2, min_periods=None): @@ -608,7 +608,7 @@ def test_series_corr_impl(S1, S2, min_periods=None): msg = 'Method corr(). The object self.data' self.assertIn(msg, str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.corr() parameter "min_periods" unsupported') def test_series_corr_unsupported_period(self): def test_series_corr_impl(S1, S2, min_periods=None): @@ -1296,7 +1296,7 @@ def test_impl(A): df = pd.DataFrame({'A': np.arange(n)}) self.assertTrue(isinstance(hpat_func(df.A), np.ndarray)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'No support of axis argument in old-style Series.fillna() impl') def test_series_fillna_axis1(self): '''Verifies Series.fillna() implementation handles 'index' as axis argument''' @@ -1307,7 +1307,7 @@ def test_impl(S): S = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf]) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'No support of axis argument in old-style Series.fillna() impl') def test_series_fillna_axis2(self): '''Verifies Series.fillna() implementation handles 0 as axis argument''' @@ -1318,7 +1318,7 @@ def test_impl(S): S = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf]) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'No support of axis argument in old-style Series.fillna() impl') def test_series_fillna_axis3(self): '''Verifies Series.fillna() implementation handles correct non-literal axis argument''' @@ -1330,7 +1330,7 @@ def test_impl(S, axis): for axis in [0, 'index']: pd.testing.assert_series_equal(hpat_func(S, axis), test_impl(S, axis)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_float_from_df(self): '''Verifies Series.fillna() applied to a named float Series obtained from a DataFrame''' @@ -1342,7 +1342,7 @@ def test_impl(S): df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0, np.inf]}) pd.testing.assert_series_equal(hpat_func(df.A), test_impl(df.A), check_names=False) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_float_index1(self): '''Verifies Series.fillna() implementation for float series with default index''' @@ -1354,7 +1354,7 @@ def test_impl(S): S = pd.Series(data) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_float_index2(self): '''Verifies Series.fillna() implementation for float series with string index''' @@ -1365,7 +1365,7 @@ def test_impl(S): S = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf], ['a', 'b', 'c', 'd', 'e']) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_float_index3(self): def test_impl(S): @@ -1375,7 +1375,7 @@ def test_impl(S): S = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf], index=[1, 2, 5, 7, 10]) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_str_from_df(self): '''Verifies Series.fillna() applied to a named float Series obtained from a DataFrame''' @@ -1388,7 +1388,7 @@ def test_impl(S): pd.testing.assert_series_equal(hpat_func(df.A), test_impl(df.A), check_names=False) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_str_index1(self): '''Verifies Series.fillna() implementation for series of strings with default index''' @@ -1399,7 +1399,7 @@ def test_impl(S): S = pd.Series(['aa', 'b', None, 'cccd', '']) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_str_index2(self): '''Verifies Series.fillna() implementation for series of strings with string index''' @@ -1410,7 +1410,7 @@ def test_impl(S): S = pd.Series(['aa', 'b', None, 'cccd', ''], ['a', 'b', 'c', 'd', 'e']) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_str_index3(self): def test_impl(S): @@ -1421,7 +1421,7 @@ def test_impl(S): S = pd.Series(['aa', 'b', None, 'cccd', ''], index=[1, 2, 5, 7, 10]) pd.testing.assert_series_equal(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_float_inplace1(self): '''Verifies Series.fillna() implementation for float series with default index and inplace argument True''' @@ -1466,10 +1466,10 @@ def test_impl(S, param): hpat_func = self.jit(test_impl) S = pd.Series([1.0, 2.0, np.nan, 1.0, np.inf]) - expected = ValueError if hpat.config.config_pipeline_hpat_default else TypingError + expected = ValueError if sdc.config.config_pipeline_hpat_default else TypingError self.assertRaises(expected, hpat_func, S, True) - @unittest.skipUnless(hpat.config.config_pipeline_hpat_default, + @unittest.skipUnless(sdc.config.config_pipeline_hpat_default, 'TODO: investigate why Numba types inplace as bool (non-literal value)') def test_series_fillna_str_inplace1(self): '''Verifies Series.fillna() implementation for series of strings @@ -1498,7 +1498,7 @@ def test_impl(S): self.assertIsNone(test_impl(S2)) pd.testing.assert_series_equal(S1, S2) - @unittest.skipUnless(hpat.config.config_pipeline_hpat_default, + @unittest.skipUnless(sdc.config.config_pipeline_hpat_default, 'TODO: investigate why Numba types inplace as bool (non-literal value)') def test_series_fillna_str_inplace_empty1(self): def test_impl(A): @@ -1545,7 +1545,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style fillna impl returns series without index') def test_series_fillna_int_no_index1(self): '''Verifies Series.fillna() implementation for integer series with default index''' @@ -1558,7 +1558,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'No support of axis argument in old-style Series.dropna() impl') def test_series_dropna_axis1(self): '''Verifies Series.dropna() implementation handles 'index' as axis argument''' @@ -1570,7 +1570,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'No support of axis argument in old-style Series.dropna() impl') def test_series_dropna_axis2(self): '''Verifies Series.dropna() implementation handles 0 as axis argument''' @@ -1582,7 +1582,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'No support of axis argument in old-style Series.dropna() impl') def test_series_dropna_axis3(self): '''Verifies Series.dropna() implementation handles correct non-literal axis argument''' @@ -1595,7 +1595,7 @@ def test_impl(S, axis): for axis in [0, 'index']: pd.testing.assert_series_equal(hpat_func(S1, axis), test_impl(S2, axis)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style dropna impl returns series without index') def test_series_dropna_float_index1(self): '''Verifies Series.dropna() implementation for float series with default index''' @@ -1608,7 +1608,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style dropna impl returns series without index') def test_series_dropna_float_index2(self): '''Verifies Series.dropna() implementation for float series with string index''' @@ -1620,7 +1620,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style dropna impl returns series without index') def test_series_dropna_str_index1(self): '''Verifies Series.dropna() implementation for series of strings with default index''' @@ -1632,7 +1632,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style dropna impl returns series without index') def test_series_dropna_str_index2(self): '''Verifies Series.dropna() implementation for series of strings with string index''' @@ -1644,7 +1644,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style dropna impl returns series without index') def test_series_dropna_str_index3(self): def test_impl(S): @@ -1747,7 +1747,7 @@ def test_impl(S): S2 = S1.copy() pd.testing.assert_series_equal(hpat_func(S1), test_impl(S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'BUG: old-style dropna impl returns series without index') def test_series_dropna_int_no_index1(self): '''Verifies Series.dropna() implementation for integer series with default index''' @@ -1794,7 +1794,7 @@ def test_impl(S): S = pd.Series([np.nan, np.nan]) self.assertEqual(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Old style Series.sum() does not support parameters") + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Old style Series.sum() does not support parameters") def test_series_sum_skipna_false(self): def test_impl(S): return S.sum(skipna=False) @@ -1803,7 +1803,7 @@ def test_impl(S): S = pd.Series([np.nan, 2., 3.]) self.assertEqual(np.isnan(hpat_func(S)), np.isnan(test_impl(S))) - @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(not sdc.config.config_pipeline_hpat_default, "Series.sum() operator + is not implemented yet for Numba") def test_series_sum2(self): def test_impl(S): @@ -1893,7 +1893,7 @@ def test_impl(S): else: self.assertEqual(actual, expected) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Series.mean() any parameters unsupported") + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Series.mean() any parameters unsupported") def test_series_mean_skipna(self): def test_impl(S, skipna): return S.mean(skipna=skipna) @@ -1943,7 +1943,7 @@ def test_impl(S): result = hpat_func(S) self.assertEqual(result, result_ref) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Series.min() any parameters unsupported") + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Series.min() any parameters unsupported") def test_series_min_param(self): def test_impl(S, param_skipna): return S.min(skipna=param_skipna) @@ -1973,7 +1973,7 @@ def test_impl(S): result = hpat_func(S) self.assertEqual(result, result_ref) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Series.max() any parameters unsupported") + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Series.max() any parameters unsupported") def test_series_max_param(self): def test_impl(S, param_skipna): return S.max(skipna=param_skipna) @@ -2323,7 +2323,7 @@ def test_series_str2str(self): common_methods = ['lower', 'upper'] sdc_methods = ['capitalize', 'lstrip', 'rstrip', 'strip', 'swapcase', 'title'] str2str_methods = common_methods[:] - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: str2str_methods += sdc_methods for method in str2str_methods: @@ -2338,7 +2338,7 @@ def test_series_str2str(self): pd.testing.assert_series_equal(hpat_func(S), test_impl(S), check_names=method in common_methods) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.str.() unsupported') def test_series_str2str_unsupported(self): unsupported_methods = ['capitalize', 'lstrip', 'rstrip', @@ -2348,7 +2348,7 @@ def test_series_str2str_unsupported(self): ' return S.str.{}()'.format(method)] func_text = '\n'.join(func_lines) test_impl = _make_func_from_text(func_text) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) S = pd.Series([' \tbbCD\t ', 'ABC', ' mCDm\t', 'abc']) # TypingError with expected message is raised internally by Numba @@ -2357,7 +2357,7 @@ def test_series_str2str_unsupported(self): expected_msg = 'Series.str.{} is not supported yet'.format(method) self.assertIn(expected_msg, str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Old-style append implementation doesn't handle ignore_index argument") def test_series_append_single_ignore_index(self): '''Verify Series.append() concatenates Series with other single Series ignoring indexes''' @@ -2373,7 +2373,7 @@ def test_impl(S, other): S1, S2 = [pd.Series(data) for data in data_list] pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Old-style append implementation doesn't handle ignore_index argument") def test_series_append_list_ignore_index(self): '''Verify Series.append() concatenates Series with list of other Series ignoring indexes''' @@ -2382,7 +2382,7 @@ def test_impl(S1, S2, S3): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, ''], ['d', None], ['']] for dtype, data_list in dtype_to_data.items(): @@ -2398,7 +2398,7 @@ def test_impl(S1, S2, S3): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, ''], ['d', None], ['']] for dtype, data_list in dtype_to_data.items(): @@ -2406,7 +2406,7 @@ def test_impl(S1, S2, S3): S1, S2, S3 = [pd.Series(data) for data in data_list] pd.testing.assert_series_equal(hpat_func(S1, S2, S3), test_impl(S1, S2, S3)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_default(self): '''Verify Series.append() concatenates Series with other single Series respecting default indexes''' @@ -2415,7 +2415,7 @@ def test_impl(S, other): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e']] for dtype, data_list in dtype_to_data.items(): @@ -2423,7 +2423,7 @@ def test_impl(S, other): S1, S2 = [pd.Series(data) for data in data_list] pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_default(self): '''Verify Series.append() concatenates Series with list of other Series respecting default indexes''' @@ -2432,7 +2432,7 @@ def test_impl(S1, S2, S3): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] for dtype, data_list in dtype_to_data.items(): @@ -2448,7 +2448,7 @@ def test_impl(S1, S2, S3): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1], [-2., 5.0], [1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', 'b', 'q'], ['d', 'e'], ['s']] for dtype, data_list in dtype_to_data.items(): @@ -2456,7 +2456,7 @@ def test_impl(S1, S2, S3): S1, S2, S3 = [pd.Series(data) for data in data_list] pd.testing.assert_series_equal(hpat_func(S1, S2, S3), test_impl(S1, S2, S3)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_int(self): '''Verify Series.append() concatenates Series with other single Series respecting integer indexes''' @@ -2465,7 +2465,7 @@ def test_impl(S, other): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0, -1]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e', 'ttt']] indexes = [[1, 2, 3, 4], [7, 8, 11, 3, 4]] @@ -2474,7 +2474,7 @@ def test_impl(S, other): S1, S2 = [pd.Series(data, index=indexes[i]) for i, data in enumerate(data_list)] pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_int(self): '''Verify Series.append() concatenates Series with list of other Series respecting integer indexes''' @@ -2483,7 +2483,7 @@ def test_impl(S1, S2, S3): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] indexes = [[1, 2, 3, 4], [7, 8, 11, 3], [4]] @@ -2500,7 +2500,7 @@ def test_impl(S1, S2, S3): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] indexes = [[1, 2, 3, 4], [7, 8, 11, 3], [4]] @@ -2509,7 +2509,7 @@ def test_impl(S1, S2, S3): S1, S2, S3 = [pd.Series(data, index=indexes[i]) for i, data in enumerate(data_list)] pd.testing.assert_series_equal(hpat_func(S1, S2, S3), test_impl(S1, S2, S3)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_index_str(self): '''Verify Series.append() concatenates Series with other single Series respecting string indexes''' @@ -2518,7 +2518,7 @@ def test_impl(S, other): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0, -1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e', 'ttt']] indexes = [['a', 'bb', 'ccc', 'dddd'], ['a1', 'a2', 'a3', 'a4', 'a5']] @@ -2527,7 +2527,7 @@ def test_impl(S, other): S1, S2 = [pd.Series(data, index=indexes[i]) for i, data in enumerate(data_list)] pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_index_str(self): '''Verify Series.append() concatenates Series with list of other Series respecting string indexes''' @@ -2536,7 +2536,7 @@ def test_impl(S1, S2, S3): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] indexes = [['a', 'bb', 'ccc', 'dddd'], ['q', 't', 'a', 'x'], ['dd']] @@ -2553,7 +2553,7 @@ def test_impl(S1, S2, S3): hpat_func = self.jit(test_impl) dtype_to_data = {'float': [[-2., 3., 9.1, np.nan], [-2., 5.0, np.inf, 0], [-1.0]]} - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: dtype_to_data['string'] = [['a', None, 'bbbb', ''], ['dd', None, '', 'e'], ['ttt']] indexes = [['a', 'bb', 'ccc', 'dddd'], ['q', 't', 'a', 'x'], ['dd']] @@ -2562,7 +2562,7 @@ def test_impl(S1, S2, S3): S1, S2, S3 = [pd.Series(data, index=indexes[i]) for i, data in enumerate(data_list)] pd.testing.assert_series_equal(hpat_func(S1, S2, S3), test_impl(S1, S2, S3)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Old-style append implementation doesn't handle ignore_index argument") def test_series_append_ignore_index_literal(self): '''Verify Series.append() implementation handles ignore_index argument as Boolean literal''' @@ -2574,7 +2574,7 @@ def test_impl(S, other): S2 = pd.Series([-2., 5.0], ['a2', 'b2']) pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Old-style append implementation doesn't handle ignore_index argument") def test_series_append_ignore_index_non_literal(self): '''Verify Series.append() implementation raises if ignore_index argument is not a Boolean literal''' @@ -2590,7 +2590,7 @@ def test_impl(S, other, param): msg = 'Method append(). The ignore_index must be a literal Boolean constant. Given: {}' self.assertIn(msg.format(types.bool_), str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_single_dtype_promotion(self): '''Verify Series.append() implementation handles appending single Series with different dtypes''' @@ -2602,7 +2602,7 @@ def test_impl(S, other): S2 = pd.Series([-2, 5], ['a2', 'b2']) pd.testing.assert_series_equal(hpat_func(S1, S2), test_impl(S1, S2)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style append implementation doesn't handle series index") def test_series_append_list_dtype_promotion(self): '''Verify Series.append() implementation handles appending list of Series with different dtypes''' @@ -2791,7 +2791,7 @@ def test_impl(): return series.nlargest(4) hpat_func = self.jit(test_impl) - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: np.testing.assert_array_equal(test_impl(), hpat_func()) else: pd.testing.assert_series_equal(test_impl(), hpat_func()) @@ -2806,12 +2806,12 @@ def test_impl(series, n): for n in range(-1, 10): ref_result = test_impl(series, n) jit_result = hpat_func(series, n) - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: np.testing.assert_array_equal(ref_result, jit_result) else: pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(not sdc.config.config_pipeline_hpat_default, 'Series.nlargest() parallelism unsupported') def test_series_nlargest_parallel(self): # create `kde.parquet` file @@ -2823,14 +2823,14 @@ def test_impl(): return S.nlargest(4) hpat_func = self.jit(test_impl) - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: np.testing.assert_array_equal(test_impl(), hpat_func()) else: pd.testing.assert_series_equal(test_impl(), hpat_func()) self.assertEqual(count_parfor_REPs(), 0) self.assertTrue(count_array_OneDs() > 0) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.nlargest() parameter keep unsupported') def test_series_nlargest_full(self): def test_impl(series, n, keep): @@ -2855,7 +2855,7 @@ def test_impl(series, n): data_duplicated = data * 3 # TODO: add integer index not equal to range after index is fixed indexes = [range(len(data_duplicated))] - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: indexes.append(gen_strlist(len(data_duplicated))) for index in indexes: @@ -2863,12 +2863,12 @@ def test_impl(series, n): for n in range(-1, 10): ref_result = test_impl(series, n) jit_result = hpat_func(series, n) - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: np.testing.assert_array_equal(ref_result, jit_result) else: pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.nlargest() does not raise an exception') def test_series_nlargest_typing(self): _func_name = 'Method nlargest().' @@ -2892,7 +2892,7 @@ def test_impl(series, n, keep): msg = '{} The object keep\n given: {}\n expected: str' self.assertIn(msg.format(_func_name, dtype), str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.nlargest() does not raise an exception') def test_series_nlargest_unsupported(self): msg = "Method nlargest(). Unsupported parameter. Given 'keep' != 'first'" @@ -2917,7 +2917,7 @@ def test_impl(): return series.nsmallest(4) hpat_func = self.jit(test_impl) - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: np.testing.assert_array_equal(test_impl(), hpat_func()) else: pd.testing.assert_series_equal(test_impl(), hpat_func()) @@ -2932,12 +2932,12 @@ def test_impl(series, n): for n in range(-1, 10): ref_result = test_impl(series, n) jit_result = hpat_func(series, n) - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: np.testing.assert_array_equal(ref_result, jit_result) else: pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(not sdc.config.config_pipeline_hpat_default, 'Series.nsmallest() parallelism unsupported') def test_series_nsmallest_parallel(self): # create `kde.parquet` file @@ -2949,14 +2949,14 @@ def test_impl(): return S.nsmallest(4) hpat_func = self.jit(test_impl) - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: np.testing.assert_array_equal(test_impl(), hpat_func()) else: pd.testing.assert_series_equal(test_impl(), hpat_func()) self.assertEqual(count_parfor_REPs(), 0) self.assertTrue(count_array_OneDs() > 0) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.nsmallest() parameter keep unsupported') def test_series_nsmallest_full(self): def test_impl(series, n, keep): @@ -2981,7 +2981,7 @@ def test_impl(series, n): data_duplicated = data * 3 # TODO: add integer index not equal to range after index is fixed indexes = [range(len(data_duplicated))] - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: indexes.append(gen_strlist(len(data_duplicated))) for index in indexes: @@ -2989,12 +2989,12 @@ def test_impl(series, n): for n in range(-1, 10): ref_result = test_impl(series, n) jit_result = hpat_func(series, n) - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: np.testing.assert_array_equal(ref_result, jit_result) else: pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.nsmallest() does not raise an exception') def test_series_nsmallest_typing(self): _func_name = 'Method nsmallest().' @@ -3018,7 +3018,7 @@ def test_impl(series, n, keep): msg = '{} The object keep\n given: {}\n expected: str' self.assertIn(msg.format(_func_name, dtype), str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.nsmallest() does not raise an exception') def test_series_nsmallest_unsupported(self): msg = "Method nsmallest(). Unsupported parameter. Given 'keep' != 'first'" @@ -3235,7 +3235,7 @@ def test_impl(S): S = pd.Series(np.random.ranf(m)) self.assertEqual(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "BUG: old-style median implementation doesn't filter NaNs") def test_series_median_skipna_default1(self): '''Verifies median implementation with default skipna=True argument on a series with NA values''' @@ -3246,7 +3246,7 @@ def test_impl(S): S = pd.Series([2., 3., 5., np.nan, 5., 6., 7.]) self.assertEqual(hpat_func(S), test_impl(S)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Skipna argument is not supported in old-style") def test_series_median_skipna_false1(self): '''Verifies median implementation with skipna=False on a series with NA values''' @@ -3816,7 +3816,7 @@ def test_impl(): hpat_func = self.jit(test_impl) np.testing.assert_equal(hpat_func(), test_impl()) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, "Series.quantile() parameter as a list unsupported") + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, "Series.quantile() parameter as a list unsupported") def test_series_quantile_q_vector(self): def test_series_quantile_q_vector_impl(S, param1): return S.quantile(param1) @@ -3895,7 +3895,7 @@ def pyfunc(): result = cfunc() np.testing.assert_equal(ref_result, result) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.std() parameters "skipna" and "ddof" unsupported') def test_series_std_unboxing(self): def pyfunc(series, skipna, ddof): @@ -3910,7 +3910,7 @@ def pyfunc(series, skipna, ddof): result = cfunc(series, skipna=skipna, ddof=ddof) np.testing.assert_equal(ref_result, result) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.std() strings as input data unsupported') def test_series_std_str(self): def pyfunc(series): @@ -3923,7 +3923,7 @@ def pyfunc(series): msg = 'Method std(). The object must be a number. Given self.data.dtype: {}' self.assertIn(msg.format(types.unicode_type), str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.std() parameters "axis", "level", "numeric_only" unsupported') def test_series_std_unsupported_params(self): def pyfunc(series, axis, level, numeric_only): @@ -3972,7 +3972,7 @@ def test_series_nunique_param1_impl(S, dropna): [np.nan, np.nan, np.inf], ] - if hpat.config.config_pipeline_hpat_default: + if sdc.config.config_pipeline_hpat_default: """ SDC pipeline Series.nunique() does not support numpy.nan """ @@ -3988,7 +3988,7 @@ def test_series_nunique_param1_impl(S, dropna): result = hpat_func(S) self.assertEqual(result, result_ref) - if not hpat.config.config_pipeline_hpat_default: + if not sdc.config.config_pipeline_hpat_default: """ SDC pipeline does not support parameter to Series.nunique(dropna=True) """ @@ -4008,7 +4008,7 @@ def pyfunc(): cfunc = self.jit(pyfunc) np.testing.assert_equal(pyfunc(), cfunc()) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.var() data [max_uint64, max_uint64] unsupported') def test_series_var_unboxing(self): def pyfunc(series): @@ -4019,7 +4019,7 @@ def pyfunc(series): series = pd.Series(data) np.testing.assert_equal(pyfunc(series), cfunc(series)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.var() parameters "ddof" and "skipna" unsupported') def test_series_var_full(self): def pyfunc(series, skipna, ddof): @@ -4034,7 +4034,7 @@ def pyfunc(series, skipna, ddof): result = cfunc(series, skipna=skipna, ddof=ddof) np.testing.assert_equal(ref_result, result) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.var() strings as input data unsupported') def test_series_var_str(self): def pyfunc(series): @@ -4047,7 +4047,7 @@ def pyfunc(series): msg = 'Method var(). The object must be a number. Given self.data.dtype: {}' self.assertIn(msg.format(types.unicode_type), str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.var() parameters "axis", "level", "numeric_only" unsupported') def test_series_var_unsupported_params(self): def pyfunc(series, axis, level, numeric_only): @@ -4097,7 +4097,7 @@ def test_series_count_impl(S): self.assertEqual(result, result_ref) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.cumsum() np.nan as input data unsupported') def test_series_cumsum(self): def test_impl(): @@ -4108,7 +4108,7 @@ def test_impl(): cfunc = self.jit(pyfunc) pd.testing.assert_series_equal(pyfunc(), cfunc()) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.cumsum() np.nan as input data unsupported') def test_series_cumsum_unboxing(self): def test_impl(s): @@ -4121,7 +4121,7 @@ def test_impl(s): series = pd.Series(data) pd.testing.assert_series_equal(pyfunc(series), cfunc(series)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.cumsum() parameters "axis", "skipna" unsupported') def test_series_cumsum_full(self): def test_impl(s, axis, skipna): @@ -4138,7 +4138,7 @@ def test_impl(s, axis, skipna): jit_result = cfunc(series, axis=axis, skipna=skipna) pd.testing.assert_series_equal(ref_result, jit_result) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.cumsum() strings as input data unsupported') def test_series_cumsum_str(self): def test_impl(s): @@ -4151,7 +4151,7 @@ def test_impl(s): msg = 'Method cumsum(). The object must be a number. Given self.data.dtype: {}' self.assertIn(msg.format(types.unicode_type), str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.cumsum() parameter "axis" unsupported') def test_series_cumsum_unsupported_axis(self): def test_impl(s, axis): @@ -4165,7 +4165,7 @@ def test_impl(s, axis): msg = 'Method cumsum(). Unsupported parameters. Given axis: int' self.assertIn(msg, str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.cov() parameter "min_periods" unsupported') def test_series_cov(self): def test_series_cov_impl(S1, S2, min_periods=None): @@ -4193,7 +4193,7 @@ def test_series_cov_impl(S1, S2, min_periods=None): result = hpat_func(S1, S2, min_periods=period) np.testing.assert_allclose(result, result_ref) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.cov() parameter "min_periods" unsupported') def test_series_cov_unsupported_dtype(self): def test_series_cov_impl(S1, S2, min_periods=None): @@ -4215,7 +4215,7 @@ def test_series_cov_impl(S1, S2, min_periods=None): msg = 'Method cov(). The object self.data' self.assertIn(msg, str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.cov() parameter "min_periods" unsupported') def test_series_cov_unsupported_period(self): def test_series_cov_impl(S1, S2, min_periods=None): @@ -4235,13 +4235,13 @@ def test_series_cov_impl(S1, S2, min_periods=None): msg = 'Method cov(). The object min_periods' self.assertIn(msg, str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.pct_change unsupported some Series') def test_series_pct_change(self): def test_series_pct_change_impl(S, periods, method): return S.pct_change(periods=periods, fill_method=method, limit=None, freq=None) - hpat_func = hpat.jit(test_series_pct_change_impl) + hpat_func = sdc.jit(test_series_pct_change_impl) test_input_data = [ [], [np.nan, np.nan, np.nan], @@ -4259,13 +4259,13 @@ def test_series_pct_change_impl(S, periods, method): result = hpat_func(S, periods, method) pd.testing.assert_series_equal(result, result_ref) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.pct_change() strings as input data unsupported') def test_series_pct_change_str(self): def test_series_pct_change_impl(S): return S.pct_change(periods=1, fill_method='pad', limit=None, freq=None) - hpat_func = hpat.jit(test_series_pct_change_impl) + hpat_func = sdc.jit(test_series_pct_change_impl) S = pd.Series(test_global_input_data_unicode_kind4) with self.assertRaises(TypingError) as raises: @@ -4273,13 +4273,13 @@ def test_series_pct_change_impl(S): msg = 'Method pct_change(). The object self.data' self.assertIn(msg, str(raises.exception)) - @unittest.skipIf(hpat.config.config_pipeline_hpat_default, + @unittest.skipIf(sdc.config.config_pipeline_hpat_default, 'Series.pct_change() does not raise an exception') def test_series_pct_change_not_supported(self): def test_series_pct_change_impl(S, periods=1, fill_method='pad', limit=None, freq=None): return S.pct_change(periods=periods, fill_method=fill_method, limit=limit, freq=freq) - hpat_func = hpat.jit(test_series_pct_change_impl) + hpat_func = sdc.jit(test_series_pct_change_impl) S = pd.Series([0, 0, 0, np.nan, np.nan, 0, 0, np.nan, np.inf, 0, 0, np.inf, np.inf]) with self.assertRaises(ValueError) as raises: hpat_func(S, fill_method='ababa') diff --git a/hpat/tests/test_strings.py b/sdc/tests/test_strings.py similarity index 84% rename from hpat/tests/test_strings.py rename to sdc/tests/test_strings.py index f3bdbaeac..dc92d6b6f 100644 --- a/hpat/tests/test_strings.py +++ b/sdc/tests/test_strings.py @@ -29,16 +29,16 @@ import unittest import platform -import hpat +import sdc import numpy as np import pandas as pd import glob import gc import re import pyarrow.parquet as pq -from hpat.str_arr_ext import StringArray -from hpat.str_ext import unicode_to_std_str, std_str_to_unicode -from hpat.tests.gen_test_data import ParquetGenerator +from sdc.str_arr_ext import StringArray +from sdc.str_ext import unicode_to_std_str, std_str_to_unicode +from sdc.tests.gen_test_data import ParquetGenerator class TestStrings(unittest.TestCase): @@ -46,7 +46,7 @@ class TestStrings(unittest.TestCase): def test_pass_return(self): def test_impl(_str): return _str - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) # pass single string and return arg = 'test_str' @@ -59,7 +59,7 @@ def test_impl(_str): def test_const(self): def test_impl(): return 'test_str' - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_str2str(self): @@ -71,7 +71,7 @@ def test_str2str(self): loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) arg = ' \tbbCD\t ' self.assertEqual(hpat_func(arg), test_impl(arg)) @@ -81,20 +81,20 @@ def test_equality(self): def test_impl(_str): return (_str == 'test_str') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(arg), test_impl(arg)) def test_impl(_str): return (_str != 'test_str') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(arg), test_impl(arg)) def test_concat(self): def test_impl(_str): return (_str + 'test_str') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) arg = 'a_' self.assertEqual(hpat_func(arg), test_impl(arg)) @@ -102,7 +102,7 @@ def test_impl(_str): def test_split(self): def test_impl(_str): return _str.split('/') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) arg = 'aa/bb/cc' self.assertEqual(hpat_func(arg), test_impl(arg)) @@ -110,7 +110,7 @@ def test_impl(_str): def test_replace(self): def test_impl(_str): return _str.replace('/', ';') - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) arg = 'aa/bb/cc' self.assertEqual(hpat_func(arg), test_impl(arg)) @@ -118,7 +118,7 @@ def test_impl(_str): def test_getitem_int(self): def test_impl(_str): return _str[3] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) arg = 'aa/bb/cc' self.assertEqual(hpat_func(arg), test_impl(arg)) @@ -126,7 +126,7 @@ def test_impl(_str): def test_string_int_cast(self): def test_impl(_str): return int(_str) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) arg = '12' self.assertEqual(hpat_func(arg), test_impl(arg)) @@ -134,7 +134,7 @@ def test_impl(_str): def test_string_float_cast(self): def test_impl(_str): return float(_str) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) arg = '12.2' self.assertEqual(hpat_func(arg), test_impl(arg)) @@ -142,7 +142,7 @@ def test_impl(_str): def test_string_str_cast(self): def test_impl(a): return str(a) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) for arg in [np.int32(45), 43, np.float32(1.4), 4.5]: py_res = test_impl(arg) @@ -154,16 +154,16 @@ def test_re_sub(self): def test_impl(_str): p = re.compile('ab*') return p.sub('ff', _str) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) arg = 'aabbcc' self.assertEqual(hpat_func(arg), test_impl(arg)) def test_regex_std(self): def test_impl(_str, _pat): - return hpat.str_ext.contains_regex( - _str, hpat.str_ext.compile_regex(_pat)) - hpat_func = hpat.jit(test_impl) + return sdc.str_ext.contains_regex( + _str, sdc.str_ext.compile_regex(_pat)) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func('What does the fox say', r'd.*(the |fox ){2}'), True) @@ -172,11 +172,11 @@ def test_impl(_str, _pat): def test_replace_regex_std(self): def test_impl(_str, pat, val): s = unicode_to_std_str(_str) - e = hpat.str_ext.compile_regex(unicode_to_std_str(pat)) + e = sdc.str_ext.compile_regex(unicode_to_std_str(pat)) val = unicode_to_std_str(val) - out = hpat.str_ext.str_replace_regex(s, e, val) + out = sdc.str_ext.str_replace_regex(s, e, val) return std_str_to_unicode(out) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) _str = 'What does the fox say' pat = r'd.*(the |fox ){2}' @@ -191,9 +191,9 @@ def test_impl(_str, pat, val): s = unicode_to_std_str(_str) e = unicode_to_std_str(pat) val = unicode_to_std_str(val) - out = hpat.str_ext.str_replace_noregex(s, e, val) + out = sdc.str_ext.str_replace_noregex(s, e, val) return std_str_to_unicode(out) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) _str = 'What does the fox say' pat = 'does the fox' @@ -208,7 +208,7 @@ def test_string_array_constructor(self): # create StringArray and return as list of strings def test_impl(): return StringArray(['ABC', 'BB', 'CDEF']) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertTrue(np.array_equal(hpat_func(), ['ABC', 'BB', 'CDEF'])) @@ -217,7 +217,7 @@ def test_impl(): A = StringArray(['ABC', 'BB', 'CDEF']) B = A == 'ABC' return B.sum() - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), 1) @@ -225,7 +225,7 @@ def test_string_series(self): def test_impl(ds): rs = ds == 'one' return ds, rs - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) df = pd.DataFrame( { @@ -244,7 +244,7 @@ def test_impl(): B = A == 'ABC' C = A[B] return len(C) == 1 and C[0] == 'ABC' - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), True) @@ -255,7 +255,7 @@ def test_string_NA_box(self): def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.five - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) # XXX just checking isna() since Pandas uses None in this case # instead of nan for some reason @@ -265,7 +265,7 @@ def test_impl(): def test_decode_empty1(self): def test_impl(S): return S[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) S = pd.Series(['']) self.assertEqual(hpat_func(S), test_impl(S)) @@ -273,7 +273,7 @@ def test_impl(S): def test_decode_single_ascii_char1(self): def test_impl(S): return S[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) S = pd.Series(['A']) self.assertEqual(hpat_func(S), test_impl(S)) @@ -281,7 +281,7 @@ def test_impl(S): def test_decode_ascii1(self): def test_impl(S): return S[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) S = pd.Series(['Abc12', 'bcd', '345']) self.assertEqual(hpat_func(S), test_impl(S)) @@ -289,7 +289,7 @@ def test_impl(S): def test_decode_unicode1(self): def test_impl(S): return S[0], S[1], S[2] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) S = pd.Series(['¡Y tú quién te crees?', '🐍⚡', '大处着眼,小处着手。', ]) @@ -299,7 +299,7 @@ def test_decode_unicode2(self): # test strings that start with ascii def test_impl(S): return S[0], S[1], S[2] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) S = pd.Series(['abc¡Y tú quién te crees?', 'dd2🐍⚡', '22 大处着眼,小处着手。', ]) @@ -309,7 +309,7 @@ def test_encode_unicode1(self): def test_impl(): return pd.Series(['¡Y tú quién te crees?', '🐍⚡', '大处着眼,小处着手。', ]) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) pd.testing.assert_series_equal(hpat_func(), test_impl()) @@ -317,7 +317,7 @@ def test_impl(): def test_box_np_arr_string(self): def test_impl(A): return A[0] - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) A = np.array(['AA', 'B']) self.assertEqual(hpat_func(A), test_impl(A)) @@ -326,27 +326,27 @@ def test_impl(A): def test_glob(self): def test_impl(): glob.glob("*py") - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_set_string(self): def test_impl(): - s = hpat.set_ext.init_set_string() + s = sdc.set_ext.init_set_string() s.add('ff') for v in s: pass return v - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) def test_dict_string(self): def test_impl(): - s = hpat.dict_ext.dict_unicode_type_unicode_type_init() + s = sdc.dict_ext.dict_unicode_type_unicode_type_init() s['aa'] = 'bb' return s['aa'], ('aa' in s) - hpat_func = hpat.jit(test_impl) + hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), ('bb', True)) diff --git a/hpat/tests/test_utils.py b/sdc/tests/test_utils.py similarity index 70% rename from hpat/tests/test_utils.py rename to sdc/tests/test_utils.py index 5334cf960..31d8be1ae 100644 --- a/hpat/tests/test_utils.py +++ b/sdc/tests/test_utils.py @@ -25,7 +25,7 @@ # ***************************************************************************** -import hpat +import sdc import numba @@ -37,56 +37,56 @@ def count_array_REPs(): - from hpat.distributed import Distribution - vals = hpat.distributed.dist_analysis.array_dists.values() + from sdc.distributed import Distribution + vals = sdc.distributed.dist_analysis.array_dists.values() return sum([v == Distribution.REP for v in vals]) def count_parfor_REPs(): - from hpat.distributed import Distribution - vals = hpat.distributed.dist_analysis.parfor_dists.values() + from sdc.distributed import Distribution + vals = sdc.distributed.dist_analysis.parfor_dists.values() return sum([v == Distribution.REP for v in vals]) def count_parfor_OneDs(): - from hpat.distributed import Distribution - vals = hpat.distributed.dist_analysis.parfor_dists.values() + from sdc.distributed import Distribution + vals = sdc.distributed.dist_analysis.parfor_dists.values() return sum([v == Distribution.OneD for v in vals]) def count_array_OneDs(): - from hpat.distributed import Distribution - vals = hpat.distributed.dist_analysis.array_dists.values() + from sdc.distributed import Distribution + vals = sdc.distributed.dist_analysis.array_dists.values() return sum([v == Distribution.OneD for v in vals]) def count_parfor_OneD_Vars(): - from hpat.distributed import Distribution - vals = hpat.distributed.dist_analysis.parfor_dists.values() + from sdc.distributed import Distribution + vals = sdc.distributed.dist_analysis.parfor_dists.values() return sum([v == Distribution.OneD_Var for v in vals]) def count_array_OneD_Vars(): - from hpat.distributed import Distribution - vals = hpat.distributed.dist_analysis.array_dists.values() + from sdc.distributed import Distribution + vals = sdc.distributed.dist_analysis.array_dists.values() return sum([v == Distribution.OneD_Var for v in vals]) def dist_IR_contains(*args): - return sum([(s in hpat.distributed.fir_text) for s in args]) + return sum([(s in sdc.distributed.fir_text) for s in args]) -@hpat.jit +@sdc.jit def get_rank(): - return hpat.distributed_api.get_rank() + return sdc.distributed_api.get_rank() -@hpat.jit +@sdc.jit def get_start_end(n): - rank = hpat.distributed_api.get_rank() - n_pes = hpat.distributed_api.get_size() - start = hpat.distributed_api.get_start(n, n_pes, rank) - end = hpat.distributed_api.get_end(n, n_pes, rank) + rank = sdc.distributed_api.get_rank() + n_pes = sdc.distributed_api.get_size() + start = sdc.distributed_api.get_start(n, n_pes, rank) + end = sdc.distributed_api.get_end(n, n_pes, rank) return start, end diff --git a/sdc/tests/tests_perf/__init__.py b/sdc/tests/tests_perf/__init__.py new file mode 100644 index 000000000..c31b97f2c --- /dev/null +++ b/sdc/tests/tests_perf/__init__.py @@ -0,0 +1,2 @@ +from sdc.tests.tests_perf.test_perf_unicode import * +from sdc.tests.tests_perf.test_perf_series_str import * diff --git a/hpat/tests/tests_perf/test_perf_series_str.py b/sdc/tests/tests_perf/test_perf_series_str.py similarity index 98% rename from hpat/tests/tests_perf/test_perf_series_str.py rename to sdc/tests/tests_perf/test_perf_series_str.py index 02a15c99e..ac4d4afc2 100644 --- a/hpat/tests/tests_perf/test_perf_series_str.py +++ b/sdc/tests/tests_perf/test_perf_series_str.py @@ -33,8 +33,8 @@ import pandas as pd -from hpat.tests.test_utils import * -from hpat.tests.tests_perf.test_perf_utils import * +from sdc.tests.test_utils import * +from sdc.tests.tests_perf.test_perf_utils import * def usecase_series_len(input_data): @@ -112,7 +112,7 @@ def usecase_series_strip(input_data): @contextmanager def do_jit(f): """Context manager to jit function""" - cfunc = hpat.jit(f) + cfunc = sdc.jit(f) try: yield cfunc finally: @@ -180,7 +180,7 @@ def tearDownClass(cls): def _test_series_str(self, pyfunc, name, input_data=None): input_data = input_data or test_global_input_data_unicode_kind4 - hpat_func = hpat.jit(pyfunc) + hpat_func = sdc.jit(pyfunc) for data_length, data_width in itertools.product(self.total_data_length, self.width): data = perf_data_gen_fixed_len(input_data, data_width, data_length) test_data = pd.Series(data) diff --git a/hpat/tests/tests_perf/test_perf_unicode.py b/sdc/tests/tests_perf/test_perf_unicode.py similarity index 97% rename from hpat/tests/tests_perf/test_perf_unicode.py rename to sdc/tests/tests_perf/test_perf_unicode.py index bcf63c22b..417ee6323 100644 --- a/hpat/tests/tests_perf/test_perf_unicode.py +++ b/sdc/tests/tests_perf/test_perf_unicode.py @@ -30,8 +30,8 @@ import time import numba -from hpat.tests.test_utils import * -from hpat.tests.tests_perf.test_perf_utils import * +from sdc.tests.test_utils import * +from sdc.tests.tests_perf.test_perf_utils import * def usecase_split(input_data): diff --git a/hpat/tests/tests_perf/test_perf_utils.py b/sdc/tests/tests_perf/test_perf_utils.py similarity index 100% rename from hpat/tests/tests_perf/test_perf_utils.py rename to sdc/tests/tests_perf/test_perf_utils.py diff --git a/hpat/timsort.py b/sdc/timsort.py similarity index 99% rename from hpat/timsort.py rename to sdc/timsort.py index c2f66bbe3..79e2ebc62 100644 --- a/hpat/timsort.py +++ b/sdc/timsort.py @@ -29,7 +29,7 @@ import pandas as pd import numba from numba.extending import overload -from hpat.utils import empty_like_type, alloc_arr_tup +from sdc.utils import empty_like_type, alloc_arr_tup # ported from Spark to Numba-compilable Python # A port of the Android TimSort class, which utilizes a "stable, adaptive, iterative mergesort." diff --git a/hpat/transport/hpat_transport_mpi.cpp b/sdc/transport/hpat_transport_mpi.cpp similarity index 100% rename from hpat/transport/hpat_transport_mpi.cpp rename to sdc/transport/hpat_transport_mpi.cpp diff --git a/hpat/transport/hpat_transport_single_process.cpp b/sdc/transport/hpat_transport_single_process.cpp similarity index 100% rename from hpat/transport/hpat_transport_single_process.cpp rename to sdc/transport/hpat_transport_single_process.cpp diff --git a/hpat/utils.py b/sdc/utils.py similarity index 94% rename from hpat/utils.py rename to sdc/utils.py index 6d9f78bbb..188fe2a1a 100644 --- a/hpat/utils.py +++ b/sdc/utils.py @@ -41,9 +41,9 @@ from numba.extending import overload, intrinsic, lower_cast import collections import numpy as np -import hpat -from hpat.str_ext import string_type, list_string_array_type -from hpat.str_arr_ext import string_array_type, num_total_chars, pre_alloc_string_array +import sdc +from sdc.str_ext import string_type, list_string_array_type +from sdc.str_arr_ext import string_array_type, num_total_chars, pre_alloc_string_array from enum import Enum import types as pytypes @@ -83,7 +83,7 @@ class CTypeEnum(Enum): # silence Numba error messages for now -# TODO: customize through @hpat.jit +# TODO: customize through @sdc.jit numba.errors.error_extras = { 'unsupported_error': '', 'typing': '', @@ -214,9 +214,9 @@ def is_alloc_callname(func_name, mod_name): or (func_name == 'empty_inferred' and mod_name in ('numba.extending', 'numba.unsafe.ndarray')) or (func_name == 'pre_alloc_string_array' - and mod_name == 'hpat.str_arr_ext') + and mod_name == 'sdc.str_arr_ext') or (func_name in ('alloc_str_list', 'alloc_list_list_str') - and mod_name == 'hpat.str_ext')) + and mod_name == 'sdc.str_ext')) def find_build_tuple(func_ir, var): @@ -257,7 +257,7 @@ def generic(self, args, kws): @lower_builtin(cprint, types.VarArg(types.Any)) def cprint_lower(context, builder, sig, args): - from hpat.str_ext import string_type, char_type + from sdc.str_ext import string_type, char_type for i, val in enumerate(args): typ = sig.args[i] @@ -285,7 +285,7 @@ def cprint_lower(context, builder, sig, args): def print_dist(d): - from hpat.distributed_analysis import Distribution + from sdc.distributed_analysis import Distribution if d == Distribution.REP: return "REP" if d == Distribution.OneD: @@ -299,14 +299,14 @@ def print_dist(d): def distribution_report(): - import hpat.distributed - if hpat.distributed.dist_analysis is None: + import sdc.distributed + if sdc.distributed.dist_analysis is None: return print("Array distributions:") - for arr, dist in hpat.distributed.dist_analysis.array_dists.items(): + for arr, dist in sdc.distributed.dist_analysis.array_dists.items(): print(" {0:20} {1}".format(arr, print_dist(dist))) print("\nParfor distributions:") - for p, dist in hpat.distributed.dist_analysis.parfor_dists.items(): + for p, dist in sdc.distributed.dist_analysis.parfor_dists.items(): print(" {0:<20} {1}".format(p, print_dist(dist))) @@ -354,8 +354,8 @@ def is_array(typemap, varname): return (varname in typemap and (is_np_array(typemap, varname) or typemap[varname] in (string_array_type, list_string_array_type, - hpat.hiframes.split_impl.string_array_split_view_type) - or isinstance(typemap[varname], hpat.hiframes.pd_series_ext.SeriesType))) + sdc.hiframes.split_impl.string_array_split_view_type) + or isinstance(typemap[varname], sdc.hiframes.pd_series_ext.SeriesType))) def is_np_array(typemap, varname): @@ -369,7 +369,7 @@ def is_array_container(typemap, varname): and (isinstance(typemap[varname].dtype, types.Array) or typemap[varname].dtype == string_array_type or isinstance(typemap[varname].dtype, - hpat.hiframes.pd_series_ext.SeriesType))) + sdc.hiframes.pd_series_ext.SeriesType))) # converts an iterable to array, similar to np.array, but can support @@ -397,8 +397,8 @@ def empty_like_type(n, arr): @overload(empty_like_type) def empty_like_type_overload(n, arr): - if isinstance(arr, hpat.hiframes.pd_categorical_ext.CategoricalArray): - from hpat.hiframes.pd_categorical_ext import fix_cat_array_type + if isinstance(arr, sdc.hiframes.pd_categorical_ext.CategoricalArray): + from sdc.hiframes.pd_categorical_ext import fix_cat_array_type return lambda n, arr: fix_cat_array_type(np.empty(n, arr.dtype)) if isinstance(arr, types.Array): return lambda n, arr: np.empty(n, arr.dtype) diff --git a/setup.cfg b/setup.cfg index 8427b5747..0b5b15a7c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,10 +1,10 @@ [versioneer] VCS = git style = pep440 -versionfile_source = hpat/_version.py -versionfile_build = hpat/_version.py +versionfile_source = sdc/_version.py +versionfile_build = sdc/_version.py tag_prefix = -parentdir_prefix = hpat- +parentdir_prefix = sdc- [pycodestyle] max_line_length = 120 diff --git a/setup.py b/setup.py index 58f091ba0..2f8fd1894 100644 --- a/setup.py +++ b/setup.py @@ -163,11 +163,11 @@ def readme(): boost_libs = ['boost_filesystem', 'boost_system'] io_libs += boost_libs -ext_io = Extension(name="hpat.hio", - sources=["hpat/io/_io.cpp", "hpat/io/_csv.cpp"], - depends=["hpat/_hpat_common.h", "hpat/_distributed.h", - "hpat/_import_py.h", "hpat/io/_csv.h", - "hpat/_datetime_ext.h"], +ext_io = Extension(name="sdc.hio", + sources=["sdc/io/_io.cpp", "sdc/io/_csv.cpp"], + depends=["sdc/_hpat_common.h", "sdc/_distributed.h", + "sdc/_import_py.h", "sdc/io/_csv.h", + "sdc/_datetime_ext.h"], libraries=boost_libs, include_dirs=ind + np_compile_args['include_dirs'], library_dirs=lid, @@ -177,9 +177,9 @@ def readme(): language="c++" ) -ext_transport_mpi = Extension(name="hpat.transport_mpi", - sources=["hpat/transport/hpat_transport_mpi.cpp"], - depends=["hpat/_distributed.h"], +ext_transport_mpi = Extension(name="sdc.transport_mpi", + sources=["sdc/transport/hpat_transport_mpi.cpp"], + depends=["sdc/_distributed.h"], libraries=io_libs, include_dirs=ind, library_dirs=lid, @@ -188,9 +188,9 @@ def readme(): language="c++" ) -ext_transport_seq = Extension(name="hpat.transport_seq", - sources=["hpat/transport/hpat_transport_single_process.cpp"], - depends=["hpat/_distributed.h"], +ext_transport_seq = Extension(name="sdc.transport_seq", + sources=["sdc/transport/hpat_transport_single_process.cpp"], + depends=["sdc/_distributed.h"], include_dirs=ind, library_dirs=lid, extra_compile_args=eca, @@ -198,8 +198,8 @@ def readme(): language="c++" ) -ext_hdf5 = Extension(name="hpat.io._hdf5", - sources=["hpat/io/_hdf5.cpp"], +ext_hdf5 = Extension(name="sdc.io._hdf5", + sources=["sdc/io/_hdf5.cpp"], depends=[], libraries=hdf5_libs, include_dirs=[HDF5_DIR + '/include', ] + ind, @@ -210,18 +210,18 @@ def readme(): language="c++" ) -ext_hdist = Extension(name="hpat.hdist", - sources=["hpat/_distributed.cpp"], - depends=["hpat/_hpat_common.h"], +ext_hdist = Extension(name="sdc.hdist", + sources=["sdc/_distributed.cpp"], + depends=["sdc/_hpat_common.h"], extra_compile_args=eca, extra_link_args=ela, include_dirs=ind, library_dirs=lid, ) -ext_chiframes = Extension(name="hpat.chiframes", - sources=["hpat/_hiframes.cpp"], - depends=["hpat/_hpat_sort.h"], +ext_chiframes = Extension(name="sdc.chiframes", + sources=["sdc/_hiframes.cpp"], + depends=["sdc/_hpat_sort.h"], extra_compile_args=eca, extra_link_args=ela, include_dirs=ind, @@ -229,16 +229,16 @@ def readme(): ) -ext_dict = Extension(name="hpat.hdict_ext", - sources=["hpat/_dict_ext.cpp"], +ext_dict = Extension(name="sdc.hdict_ext", + sources=["sdc/_dict_ext.cpp"], extra_compile_args=eca, extra_link_args=ela, include_dirs=ind, library_dirs=lid, ) -ext_set = Extension(name="hpat.hset_ext", - sources=["hpat/_set_ext.cpp"], +ext_set = Extension(name="sdc.hset_ext", + sources=["sdc/_set_ext.cpp"], extra_compile_args=eca, extra_link_args=ela, include_dirs=ind, @@ -250,8 +250,8 @@ def readme(): if not is_win: str_libs += ['boost_regex'] -ext_str = Extension(name="hpat.hstr_ext", - sources=["hpat/_str_ext.cpp"], +ext_str = Extension(name="sdc.hstr_ext", + sources=["sdc/_str_ext.cpp"], libraries=str_libs, define_macros=np_compile_args['define_macros'] + [('USE_BOOST_REGEX', None)], extra_compile_args=eca, @@ -265,8 +265,8 @@ def readme(): #dt_args['library_dirs'] = dt_args['library_dirs'] + [PANDAS_DIR+'/_libs/tslibs'] #dt_args['libraries'] = dt_args['libraries'] + ['np_datetime'] -ext_dt = Extension(name="hpat.hdatetime_ext", - sources=["hpat/_datetime_ext.cpp"], +ext_dt = Extension(name="sdc.hdatetime_ext", + sources=["sdc/_datetime_ext.cpp"], libraries=np_compile_args['libraries'], define_macros=np_compile_args['define_macros'], extra_compile_args=['-std=c++11'], @@ -289,8 +289,8 @@ def readme(): # # seperate parquet reader used due to ABI incompatibility of arrow # pq_libs += ['hpat_parquet_reader'] -ext_parquet = Extension(name="hpat.parquet_cpp", - sources=["hpat/io/_parquet.cpp"], +ext_parquet = Extension(name="sdc.parquet_cpp", + sources=["sdc/io/_parquet.cpp"], libraries=pq_libs, include_dirs=['.'] + ind, define_macros=[('BUILTIN_PARQUET_READER', None)], @@ -299,14 +299,14 @@ def readme(): library_dirs=lid, ) -# ext_daal_wrapper = Extension(name="hpat.daal_wrapper", +# ext_daal_wrapper = Extension(name="sdc.daal_wrapper", # include_dirs = [DAALROOT+'/include'], # libraries = ['daal_core', 'daal_thread']+MPI_LIBS, -# sources=["hpat/_daal.cpp"] +# sources=["sdc/_daal.cpp"] # ) -ext_ros = Extension(name="hpat.ros_cpp", - sources=["hpat/_ros.cpp"], +ext_ros = Extension(name="sdc.ros_cpp", + sources=["sdc/_ros.cpp"], include_dirs=['/opt/ros/lunar/include', '/opt/ros/lunar/include/xmlrpcpp', PREFIX_DIR + '/include/', @@ -344,8 +344,8 @@ def readme(): if is_win: cv_libs = [l + '331' for l in cv_libs] -ext_cv_wrapper = Extension(name="hpat.cv_wrapper", - sources=["hpat/_cv.cpp"], +ext_cv_wrapper = Extension(name="sdc.cv_wrapper", + sources=["sdc/_cv.cpp"], include_dirs=[OPENCV_DIR + '/include'] + ind, library_dirs=[os.path.join(OPENCV_DIR, 'lib')] + lid, libraries=cv_libs, @@ -353,8 +353,8 @@ def readme(): language="c++", ) -ext_xenon_wrapper = Extension(name="hpat.hxe_ext", - sources=["hpat/io/_xe_wrapper.cpp"], +ext_xenon_wrapper = Extension(name="sdc.hxe_ext", + sources=["sdc/io/_xe_wrapper.cpp"], #include_dirs = ['/usr/include'], include_dirs=['.'] + ind, library_dirs=['.'] + lid, @@ -383,9 +383,9 @@ def readme(): # # These commands extends standart setuptools build procedure # -hpat_build_commands = versioneer.get_cmdclass() -hpat_build_commands['build_doc'] = build_doc -hpat_build_commands['build_devdoc'] = build_devdoc +sdc_build_commands = versioneer.get_cmdclass() +sdc_build_commands['build_doc'] = build_doc +sdc_build_commands['build_devdoc'] = build_devdoc class style(Command): @@ -505,9 +505,9 @@ def run(self): print("%s Style check passed" % self._result_marker) -hpat_build_commands.update({'style': style}) +sdc_build_commands.update({'style': style}) -setup(name='hpat', +setup(name='sdc', version=versioneer.get_version(), description='compiling Python code for clusters', long_description=readme(), @@ -521,17 +521,17 @@ def run(self): "Topic :: System :: Distributed Computing", ], keywords='data analytics cluster', - url='https://github.com/IntelPython/hpat', + url='https://github.com/IntelPython/sdc', author='Intel', packages=find_packages(), - package_data={'hpat.tests': ['*.bz2'], }, + package_data={'sdc.tests': ['*.bz2'], }, install_requires=['numba'], extras_require={'HDF5': ["h5py"], 'Parquet': ["pyarrow"]}, - cmdclass=hpat_build_commands, + cmdclass=sdc_build_commands, ext_modules=_ext_mods, entry_points={ "numba_extensions": [ - "init = hpat:_init_extension", + "init = sdc:_init_extension", ], }, ) diff --git a/tests_perf/build_hpat.py b/tests_perf/build_hpat.py index 59340091c..e2787246f 100644 --- a/tests_perf/build_hpat.py +++ b/tests_perf/build_hpat.py @@ -28,7 +28,7 @@ """ Build HPAT from source Usage: -python build_hpat.py --env-dir --build-dir +python build_sdc.py --env-dir --build-dir """ import argparse import logging diff --git a/tests_perf/runner.py b/tests_perf/runner.py index 0a3845708..890b162ba 100644 --- a/tests_perf/runner.py +++ b/tests_perf/runner.py @@ -45,7 +45,7 @@ def setup(self, size, nchars, implementation): self.series = StringSeriesGenerator(size=size, nchars=nchars).generate() @staticmethod - @hpat.jit + @sdc.jit def _len(series): return series.str.len() diff --git a/tests_perf/tests/algorithms.py b/tests_perf/tests/algorithms.py index 2eca8493c..9c32ca4d3 100644 --- a/tests_perf/tests/algorithms.py +++ b/tests_perf/tests/algorithms.py @@ -25,7 +25,7 @@ # ***************************************************************************** -import hpat +import sdc from .common import Implementation as Impl from .data_generator import DataGenerator, FloatSeriesGenerator @@ -49,7 +49,7 @@ def setup(self, quantile, interpolation, dtype, implementation): self.idx = data[dtype] @staticmethod - @hpat.jit + @sdc.jit def _quantile(idx, quantile, interpolation): return idx.quantile(quantile, interpolation=interpolation) @@ -71,7 +71,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size=size).generate() @staticmethod - @hpat.jit + @sdc.jit def _abs(series): return series.abs() @@ -94,7 +94,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size).generate() @staticmethod - @hpat.jit + @sdc.jit def _value_counts(series): return series.value_counts() @@ -117,7 +117,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size=size).generate() @staticmethod - @hpat.jit + @sdc.jit def _min(series): return series.min() @@ -129,7 +129,7 @@ def time_min(self, size, implementation): return self.series.min() @staticmethod - @hpat.jit + @sdc.jit def _max(series): return series.max() @@ -153,7 +153,7 @@ def setup(self, size, implementation): self.series2 = FloatSeriesGenerator(size).generate() @staticmethod - @hpat.jit + @sdc.jit def _cov(series, series2): return series.cov(series2) @@ -165,7 +165,7 @@ def time_cov(self, size, implementation): return self.series.cov(self.series2) @staticmethod - @hpat.jit + @sdc.jit def _corr(series, series2): return series.corr(series2) @@ -188,7 +188,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size=size).generate() @staticmethod - @hpat.jit + @sdc.jit def _sum(series): return series.sum() @@ -211,7 +211,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size).generate() @staticmethod - @hpat.jit + @sdc.jit def _count(series): return series.count() @@ -234,7 +234,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size).generate() @staticmethod - @hpat.jit + @sdc.jit def _nlargest(series): return series.nlargest() @@ -256,7 +256,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size).generate() @staticmethod - @hpat.jit + @sdc.jit def _nsmallest(series): return series.nsmallest() @@ -278,7 +278,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size).generate() @staticmethod - @hpat.jit + @sdc.jit def _var(series): return series.var() @@ -300,7 +300,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size).generate() @staticmethod - @hpat.jit + @sdc.jit def _mean(series): return series.mean() @@ -322,7 +322,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size).generate() @staticmethod - @hpat.jit + @sdc.jit def _median(series): return series.median() diff --git a/tests_perf/tests/io/csv.py b/tests_perf/tests/io/csv.py index 84d2a60bc..dc0b2e3fd 100644 --- a/tests_perf/tests/io/csv.py +++ b/tests_perf/tests/io/csv.py @@ -25,7 +25,7 @@ # ***************************************************************************** -import hpat +import sdc from ..common import BaseIO, Implementation as Impl from ..data_generator import DataGenerator @@ -44,7 +44,7 @@ def setup(self, implementation): self.df = data_generator.make_numeric_dataframe(5 * N) @staticmethod - @hpat.jit + @sdc.jit def _to_csv(df, fname): return df.to_csv(fname) diff --git a/tests_perf/tests/series.py b/tests_perf/tests/series.py index 47a34cc2c..3c64bc08f 100644 --- a/tests_perf/tests/series.py +++ b/tests_perf/tests/series.py @@ -25,7 +25,7 @@ # ***************************************************************************** -import hpat +import sdc from .common import Implementation as Impl from .data_generator import StringSeriesGenerator, WhiteSpaceStringSeriesGenerator @@ -44,7 +44,7 @@ def setup(self, size, nchars, implementation): self.series = StringSeriesGenerator(size=size, nchars=nchars).generate() @staticmethod - @hpat.jit + @sdc.jit def _len(series): return series.str.len() @@ -56,7 +56,7 @@ def time_len(self, size, nchars, implementation): return self.series.str.len() @staticmethod - @hpat.jit + @sdc.jit def _capitalize(series): return series.str.capitalize() @@ -68,7 +68,7 @@ def time_capitalize(self, size, nchars, implementation): return self.series.str.capitalize() @staticmethod - @hpat.jit + @sdc.jit def _lower(series): return series.str.lower() @@ -80,7 +80,7 @@ def time_lower(self, size, nchars, implementation): return self.series.str.lower() @staticmethod - @hpat.jit + @sdc.jit def _swapcase(series): return series.str.swapcase() @@ -92,7 +92,7 @@ def time_swapcase(self, size, nchars, implementation): return self.series.str.swapcase() @staticmethod - @hpat.jit + @sdc.jit def _title(series): return series.str.title() @@ -104,7 +104,7 @@ def time_title(self, size, nchars, implementation): return self.series.str.title() @staticmethod - @hpat.jit + @sdc.jit def _upper(series): return series.str.upper() @@ -128,7 +128,7 @@ def setup(self, size, nchars, implementation): self.series = WhiteSpaceStringSeriesGenerator(size=size, nchars=nchars).generate() @staticmethod - @hpat.jit + @sdc.jit def _lstrip(series): return series.str.lstrip() @@ -140,7 +140,7 @@ def time_lstrip(self, size, nchars, implementation): return self.series.str.lstrip() @staticmethod - @hpat.jit + @sdc.jit def _rstrip(series): return series.str.rstrip() @@ -152,7 +152,7 @@ def time_rstrip(self, size, nchars, implementation): return self.series.str.rstrip() @staticmethod - @hpat.jit + @sdc.jit def _strip(series): return series.str.strip() @@ -175,7 +175,7 @@ def setup(self, size, implementation): self.series = FloatSeriesGenerator(size=size).generate() @staticmethod - @hpat.jit + @sdc.jit def _sort_values(series): return series.sort_values() @@ -197,7 +197,7 @@ def setup(self, size, implementation): self.series = FloatSeriesIndexGenerator(size=size).generate() @staticmethod - @hpat.jit + @sdc.jit def _idxmax(series): return series.idxmax() @@ -208,7 +208,7 @@ def time_idxmax(self, size, implementation): return self.series.idxmax() @staticmethod - @hpat.jit + @sdc.jit def _idxmin(series): return series.idxmin() diff --git a/tutorial/intraday_mean.py b/tutorial/intraday_mean.py index 9c7835fb2..097bfecd7 100644 --- a/tutorial/intraday_mean.py +++ b/tutorial/intraday_mean.py @@ -29,16 +29,16 @@ import numpy as np import h5py import time -import hpat -from hpat import prange +import sdc +from sdc import prange # adopted from: # http://www.pythonforfinance.net/2017/02/20/intraday-stock-mean-reversion-trading-backtest-in-python/ -@hpat.jit(locals={'s_open': hpat.float64[:], 's_high': hpat.float64[:], - 's_low': hpat.float64[:], 's_close': hpat.float64[:], - 's_vol': hpat.float64[:]}) +@sdc.jit(locals={'s_open': sdc.float64[:], 's_high': sdc.float64[:], + 's_low': sdc.float64[:], 's_close': sdc.float64[:], + 's_vol': sdc.float64[:]}) def intraday_mean_revert(): file_name = "stock_data_all_google.hdf5" f = h5py.File(file_name, "r")