Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit c24e6e5

Browse files
samaidshssf
authored andcommitted
[SDC] Pandas methods parser for API Reference generation (#295)
1 parent 9bc453b commit c24e6e5

1 file changed

Lines changed: 214 additions & 47 deletions

File tree

docs/source/pandas_info.py

Lines changed: 214 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,50 +1,217 @@
1-
from inspect import getmembers, ismodule, isfunction, isclass
1+
# -*- coding: utf-8 -*-
2+
# *****************************************************************************
3+
# Copyright (c) 2019, Intel Corporation All rights reserved.
4+
#
5+
# Redistribution and use in source and binary forms, with or without
6+
# modification, are permitted provided that the following conditions are met:
7+
#
8+
# Redistributions of source code must retain the above copyright notice,
9+
# this list of conditions and the following disclaimer.
10+
#
11+
# Redistributions in binary form must reproduce the above copyright notice,
12+
# this list of conditions and the following disclaimer in the documentation
13+
# and/or other materials provided with the distribution.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
# *****************************************************************************
27+
28+
29+
from inspect import getmembers, ismodule, isclass
230
import pandas
31+
import logging
32+
from datetime import datetime
33+
34+
# -- Debug logging --------------------------------------------------------------------------------------------------
35+
enable_dubug_logging = True
36+
enable_method_logging = False
37+
enable_class_logging = False
38+
enable_module_logging = True
39+
log_file_name = '../build/pandas_info.log'
40+
41+
HELP_STR_LOG = 'True/False to enable/disable logging (True by default)'
42+
HELP_STR_MODULE_LOG = 'True/False to enable/disable logging of Pandas submodules (True by default)'
43+
HELP_STR_CLASS_LOG = 'True/False to enable/disable logging of classes (True by default)'
44+
HELP_STR_METHOD_LOG = 'True/False to enable/disable logging of methods (True by default)'
45+
46+
47+
# -- Submodules, classes, and methods to be excluded from API Reference ---------------------------------------------
48+
exclude_modules = [
49+
'pandas.core', # This is PRIVATE submodule
50+
'pandas.compat', # This is PRIVATE submodule
51+
'pandas.util' # This is PRIVATE submodule
52+
]
53+
54+
exclude_classes = [
55+
]
56+
57+
exclude_methods = [
58+
]
59+
60+
61+
def _method_logging(s):
62+
if enable_method_logging:
63+
logging.debug('[METHOD]' + s)
64+
return
65+
66+
67+
def _class_logging(s):
68+
if enable_class_logging:
69+
logging.debug('[CLASS]' + s)
70+
return
71+
72+
73+
def _module_logging(s):
74+
if enable_module_logging:
75+
logging.debug('[MODULE]' + s)
76+
return
77+
78+
79+
# -- Returns all classes and respective methods of the module -------------------------------------------------------
80+
def get_submodules_of(module, inspected, module_dict):
81+
82+
# Returns True if the mod module will not be included in API Reference
83+
def _skip_module(mod):
84+
mod_name = mod.__name__ # Get new submodule name
85+
sk_mod = False
86+
87+
if mod in inspected: # Ignore already traversed modules
88+
sk_mod = True
89+
_module_logging('`' + mod_name + '` already traversed. Ignoring')
90+
return sk_mod
91+
92+
if not mod_name.startswith('pandas'): # Traverse Pandas submodules only
93+
sk_mod = True
94+
_module_logging('`' + mod_name + '` does not start with pandas. Ignoring')
95+
return sk_mod
96+
97+
if '._' in mod_name or mod_name.startswith('_'): # Ignore internal module
98+
sk_mod = True
99+
_module_logging('`' + mod_name + '` is internal (starts with _). Ignoring')
100+
return sk_mod
101+
102+
for excl_module in exclude_modules:
103+
if mod_name.startswith(excl_module): # Ignore submodules in the exclude list
104+
sk_mod = True
105+
_module_logging('`' + mod_name + '` is in the exclusion list. Ignoring')
106+
return sk_mod
107+
108+
return sk_mod
109+
110+
# Returns True if the cls class will not be included in API Reference
111+
def _skip_class(cls):
112+
sk_class = False
113+
class_name = repr(cls)[8:-2] # Get full class name
114+
115+
if '._' in class_name: # We are interested only in public classes
116+
sk_class = True
117+
_class_logging('`' + class_name + '` is internal. Ignoring')
118+
return sk_class
119+
120+
for excl_class in exclude_classes:
121+
if class_name == excl_class: # Ignore classes in the exclude list
122+
sk_class = True
123+
_class_logging('`' + class_name + '` is in the exclusion list. Ignoring')
124+
return sk_class
125+
126+
return sk_class
127+
128+
# Returns True if the method mname will not be included in API Reference
129+
def _skip_method(method_name):
130+
sk_method = False
131+
132+
if method_name.startswith('_'): # Ignore internal methods
133+
sk_method = True
134+
_method_logging('`' + method_name + '` is internal (starts with __). Ignoring')
135+
return sk_method
136+
137+
return sk_method
138+
139+
# -- get_classes_of() implementation begins
140+
if _skip_module(module):
141+
return
142+
143+
def _generate_class_methods(cls):
144+
meths = [func for func in dir(cls) if callable(getattr(cls, func)) and not _skip_method(func)]
145+
for meth in meths:
146+
_method_logging('Adding method `' + meth + '` to the list')
147+
return meths
148+
149+
inspected.add(module) # Add module is it is not yet traversed
150+
module_name = module.__name__
151+
module_dict[module_name] = []
152+
153+
_module_logging('********************** Inspecting module `' + module_name + '`')
154+
155+
class_dict = dict()
156+
# Traverses the mod module classes and submodules
157+
for (name, obj) in getmembers(module): # Iterate through members of the submodule
158+
if isclass(obj): # We are interested in members, which are classes
159+
if not _skip_class(obj):
160+
_class_logging('********************** Inspecting class `' + name + '`')
161+
methods = _generate_class_methods(obj) # Inspect methods of the class of interest only
162+
class_dict[name] = methods
163+
module_dict[module_name].append(class_dict)
164+
165+
if ismodule(obj):
166+
if not _skip_module(obj):
167+
get_submodules_of(obj, inspected, module_dict)
168+
169+
return
170+
171+
172+
if __name__ == "__main__":
173+
import argparse
174+
175+
# Argument parser
176+
parser = argparse.ArgumentParser(description='Pandas classes-methods generator')
177+
parser.add_argument('--log', default=True, help=HELP_STR_LOG, type=bool)
178+
parser.add_argument('--module_log', default=True, help=HELP_STR_MODULE_LOG, type=bool)
179+
parser.add_argument('--class_log', default=True, help=HELP_STR_CLASS_LOG, type=bool)
180+
parser.add_argument('--method_log', default=True, help=HELP_STR_METHOD_LOG, type=bool)
181+
182+
args = parser.parse_args()
183+
enable_dubug_logging = args.log
184+
enable_method_logging = args.method_log
185+
enable_class_logging = args.class_log
186+
enable_module_logging = args.module_log
187+
188+
# Initialize logging
189+
if enable_dubug_logging:
190+
logging.basicConfig(filename=log_file_name, level=logging.DEBUG)
191+
logging.debug('****************** STARTING THE LOG *************************')
192+
logging.debug(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
3193

4-
"""
5-
def get_class_methods(the_class, class_only=False, instance_only=False, exclude_internal=True):
6-
7-
# Include methods of the_class
8-
def is_included(tup):
9-
is_method = isfunction(tup[1])
10-
if is_method:
11-
bound_to = tup[1].im_self
12-
print(bound_to)
13-
internal = tup[1].im_func.func_name[:2] == '__' and tup[1].im_func.func_name[-2:] == '__'
14-
if internal and exclude_internal:
15-
include = False
16-
else:
17-
include = (bound_to == the_class and not instance_only) or (bound_to == None and not class_only)
18-
else:
19-
include = False
20-
return include
21-
return filter(is_included, getmembers(the_class))
22-
"""
23-
24-
def parse_submodules(module):
25-
26-
methods_list = []
27-
28-
for obj in getmembers(module):
29-
if isclass(obj[1]):
30-
print(obj)
31-
methods_list.append((obj, get_class_methods(obj)))
32-
33-
if ismodule(obj[1]):
34-
print(obj)
35-
methods_list = methods_list + parse_submodules(obj)
36-
37-
return methods_list
38-
39-
40-
#lst = parse_submodules(pandas)
41-
#for item in lst:
42-
# print('Class:', item[0], 'Method:', item[1])
43-
44-
for obj in getmembers(pandas):
45-
if isclass(obj[1]):
46-
print(obj)
47-
methods = dir(obj)
48-
for item in methods:
49-
print(obj, '.', item)
194+
# Execute parser for Pandas
195+
# Here is the structure of the data being returned
196+
# class_methods = dict()
197+
# class_methods['c1'] = ['m1', 'm2', 'm3'] # methods within class c1
198+
# class_methods['c2'] = ['m4'] # methods within class c2
199+
# class_methods['c3'] = ['m5'] # methods within class c3
200+
#
201+
# module_classes = dict() # dictionary for the list of classes within a given module
202+
# module_classes['md1'] = []
203+
# module_classes['md1'].append(class_methods['c1'])
204+
# module_classes['md1'].append(class_methods['c2'])
205+
# module_classes['md2'] = []
206+
# module_classes['md2'].append(class_methods['c3'])
207+
inspected_modules = set()
208+
module_classes = dict()
209+
get_submodules_of(pandas, inspected_modules, module_classes)
50210

211+
for the_mod, the_classes in module_classes.items(): # module_classes[the_mod] == the_classes
212+
print(the_mod)
213+
for the_cls in the_classes: # the_classes is the list of the_cls items
214+
for the_cls_name, the_methods in the_cls.items(): # the_cls[the_cls_name] == the_methods
215+
print(' *', the_cls_name)
216+
for the_method in the_methods:
217+
print(' -', the_method)

0 commit comments

Comments
 (0)