Commit ca168fdd authored by Sarah Abrishami's avatar Sarah Abrishami

added rulemall wheel and changed structure

parent 1424e9a4
from .rules import *
from rulemall.utils import timing
from abc import ABCMeta, abstractmethod
class RulesGroup:
def __init__(self, data, rules=[], by=None):
self.data = data
self.by = by
self.rules = rules
def __add__(self, rule):
self.rules += rule
def validate(self):
for rule in self.rules:
self.data = rule.validate(self.data)
return self.data
def get_rule_names(self):
return [rule.name for rule in self.rules]
# TODO: complete from config method
# TODO: Get rule applicator from factory of adaptor
def remove_rule(self, name):
self.rules = list(filter(lambda x: x.name != name, self.rules))
def add_rule(self, rule):
if isinstance(rule, Rule):
self.rules.append(rule)
else:
raise TypeError('rule must be from class Rule')
def get_rule(self, name):
rule = list(filter(lambda x: x.name == name, self.rules))
if rule:
if len(rule) > 1:
return rule
else:
return rule[0]
else:
return None
class Rule:
def __init__(self, name, target, validator, invert=False, orient='column'):
self.name = name
self.target = target
self.validator = validator
self.orient = orient
self.invert = invert
def validate(self, s):
if self.orient == 'column':
if self.invert:
s[self.name] = self.validator.validate(s[self.target])
else:
s[self.name] = ~self.validator.validate(s[self.target])
elif self.orient == 'record':
if self.invert:
s[self.name] = self.validator.validate(s)
else:
s[self.name] = ~self.validator.validate(s)
return s
from functools import wraps
from time import time
def timing(f):
@wraps(f)
def wrap(*args, **kwargs):
ts = time()
result = f(*args, **kwargs)
te = time()
print(f'func: {f.__name__} took {te-ts} seconds.')
return result
return wrap
from rulemall.validators import Range, Unique, Regex, Length, Category
from rulemall.rules import Rule
import json
import yaml
def rule_builder(path):
file_format = path.split('/')[-1].split('.')[1]
if file_format == 'json':
f = open(path)
configs = json.load(f)
rules = list(map(define_rule, [config for config in configs['rules']]))
rules = list(filter(lambda rule: rule is not None, rules))
return rules
elif file_format == 'yml' or 'yaml':
f = open(path)
configs = yaml.load(f, Loader=yaml.FullLoader)
rules = list(map(define_rule, [config for config in configs['rules']]))
rules = list(filter(lambda rule: rule is not None, rules))
return rules
def define_rule(config):
rule_type = config['rule_type']
if rule_type == 'Range':
validator = Range(**config['validator_parameters'])
rule = Rule(validator=validator, **config['rule_attributes'])
return rule
elif rule_type == 'Regex':
validator = Regex(**config['validator_parameters'])
rule = Rule(validator=validator, **config['rule_attributes'])
return rule
elif rule_type == 'Length':
validator = Length(**config['validator_parameters'])
rule = Rule(validator=validator, **config['rule_attributes'])
return rule
elif rule_type == 'Unique':
validator = Unique(**config['validator_parameters'])
rule = Rule(validator=validator, **config['rule_attributes'])
return rule
elif rule_type == 'Category':
validator = Category(**config['validator_parameters'])
rule = Rule(validator=validator, **config['rule_attributes'])
return rule
import math
import re
from abc import ABCMeta, abstractmethod
import numpy as np
class ValidatorBaseClass(metaclass=ABCMeta):
@abstractmethod
def validate(self, x):
raise NotImplementedError()
class Range(ValidatorBaseClass):
def __init__(self, minimum=None, maximum=None, left_inclusive=True, right_inclusive=False):
self.min = minimum or -math.inf
self.max = maximum or math.inf
self.li = left_inclusive
self.ri = right_inclusive
def validate(self, x):
if self.li and not self.ri:
return (self.min <= x) & (x < self.max)
elif self.li and self.ri:
return (self.min <= x) & (x <= self.max)
elif not self.li and self.ri:
return (self.min < x) & (x <= self.max)
else:
return (self.min < x) & (x < self.max)
class Length(ValidatorBaseClass):
def __init__(self, min_len=None, max_len=None, left_inclusive=True, right_inclusive=False):
self.min_len = min_len or 0
self.max_len = max_len or math.inf
self.li = left_inclusive
self.ri = right_inclusive
def validate(self, x):
if self.li and not self.ri:
return (self.min_len <= x.astype(str).str.len()) & (x.astype(str).str.len() < self.max_len)
elif self.li and self.ri:
return (self.min_len <= x.astype(str).str.len()) & (x.astype(str).str.len() <= self.max_len)
elif not self.li and self.ri:
return (self.min_len < x.astype(str).str.len()) & (x.astype(str).str.len() <= self.max_len)
else:
return (self.min_len < x.astype(str).str.len()) & (x.astype(str).str.len() < self.max_len)
class Category(ValidatorBaseClass):
def __init__(self, categories=None):
self.categories = categories or []
def validate(self, x):
return x.isin(self.categories)
class Regex(ValidatorBaseClass):
def __init__(self, regex=None):
self.regex = re.compile(regex)
def validate(self, x):
return x.apply(lambda record: bool(re.search(self.regex, record)))
class Missing(ValidatorBaseClass):
def __init__(self, missing=np.nan):
self.missing = missing
def validate(self, x):
return x == self.missing
class Unique:
def __init__(self, keep=False):
self.keep = keep
def validate(self, s):
return s.duplicated(keep=self.keep)
# return lambda x: x.duplicated(keep=self.keep)
# class Bool:
# def __init__(self, operator=None):
# self.operator = operator or 'or'
#
# def validate(self, x, y):
# if self.operator == 'and':
# return x & y
# elif self.operator == 'or':
# return x | y
# class Math:
# def __init__(self, operator=None):
# self.operator = operator
#
# def validate(self, x, y):
# if self.operator == 'sum':
# return x + y
# elif self.operator == 'subtract':
# return x - y
# elif self.operator == 'multiply':
# return x * y
# elif self.operator == 'divide':
# return x / y
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment