Basic core¶

This module contains all the basic functions we need in other modules of the fastai library (split with torch_core that contains the ones requiring pytorch). Its documentation can easily be skipped at a first read, unless you want to know what a given function does.

Global constants¶

default_cpus = min(16, num_cpus())

[source]

Check functions¶

Examples for two fastai.core functions. Docstring shown before calling has_arg for reference

has_arg(download_url,'url')

True

has_arg(index_row,'x')

False

has_arg(index_row,'a')

True

param,alt_param = None,5
ifnone(param,alt_param)

5

param,alt_param = None,[1,2,3]
ifnone(param,alt_param)

[1, 2, 3]

two_d_array = np.arange(12).reshape(6,2)
print( two_d_array )
print( is1d(two_d_array) )

[[ 0  1]
 [ 2  3]
 [ 4  5]
 [ 6  7]
 [ 8  9]
 [10 11]]
False

is1d(two_d_array.flatten())

True

Check if x is a Collection. Tuple or List qualify

some_data = [1,2,3]
is_listy(some_data)

True

some_data = (1,2,3)
is_listy(some_data)

True

some_data = 1024
print( is_listy(some_data) )

False

print( is_listy( [some_data] ) )

True

some_data = dict([('a',1),('b',2),('c',3)])
print( some_data )
print( some_data.keys() )

{'a': 1, 'b': 2, 'c': 3}
dict_keys(['a', 'b', 'c'])

print( is_listy(some_data) )
print( is_listy(some_data.keys()) )

False
False

print( is_listy(list(some_data.keys())) )

True

Check if x is a tuple.

print( is_tuple( [1,2,3] ) )

False

print( is_tuple( (1,2,3) ) )

True

Collection related functions¶

arange_of([5,6,7])

array([0, 1, 2])

type(arange_of([5,6,7]))

numpy.ndarray

array([1,2,3])

array([1, 2, 3])

Note that after we call the generator, we do not reset. So the array call has 5 less entries than it would if we ran from the start of the generator.

def data_gen():
    i = 100.01
    while i<200:
        yield i
        i += 1.

ex_data_gen = data_gen()
for _ in range(5):
    print(next(ex_data_gen))

100.01
101.01
102.01
103.01
104.01

array(ex_data_gen)

array([105.01, 106.01, 107.01, 108.01, ..., 196.01, 197.01, 198.01, 199.01])

ex_data_gen_int = data_gen()

array(ex_data_gen_int,dtype=int)  #Cast output to int array

array([100, 101, 102, 103, ..., 196, 197, 198, 199])

data_a = np.arange(15)
data_b = np.arange(15)[::-1]

mask_a = (data_a > 10)
print(data_a)
print(data_b)
print(mask_a)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
[14 13 12 11 10  9  8  7  6  5  4  3  2  1  0]
[False False False False False False False False False False False  True  True  True  True]

arrays_split(mask_a,data_a)

[(array([11, 12, 13, 14]),),
 (array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]),)]

np.vstack([data_a,data_b]).transpose().shape

(15, 2)

arrays_split(mask_a,np.vstack([data_a,data_b]).transpose()) #must match on dimension 0

[(array([[11,  3],
         [12,  2],
         [13,  1],
         [14,  0]]),), (array([[ 0, 14],
         [ 1, 13],
         [ 2, 12],
         [ 3, 11],
         [ 4, 10],
         [ 5,  9],
         [ 6,  8],
         [ 7,  7],
         [ 8,  6],
         [ 9,  5],
         [10,  4]]),)]

You can transform a Collection into an Iterable of 'n' sized chunks by calling chunks:

data = [0,1,2,3,4,5,6,7,8,9]
for chunk in chunks(data, 2):
    print(chunk)

[0, 1]
[2, 3]
[4, 5]
[6, 7]
[8, 9]

for chunk in chunks(data, 3):
    print(chunk)

[0, 1, 2]
[3, 4, 5]
[6, 7, 8]
[9]

ex_df = pd.DataFrame.from_dict({"a":[1,1,1],"b":[2,2,2]})
print(ex_df)

   a  b
0  1  2
1  1  2
2  1  2

df_names_to_idx('b',ex_df)

[1]

key_word_args = {"a":2,"some_list":[1,2,3],"param":'mean'}
key_word_args

{'a': 2, 'some_list': [1, 2, 3], 'param': 'mean'}

(extracted_val,remainder) = extract_kwargs(['param'],key_word_args)
print( extracted_val,remainder )

{'param': 'mean'} {'a': 2, 'some_list': [1, 2, 3]}

idx_dict(['a','b','c'])

{'a': 0, 'b': 1, 'c': 2}

a is basically something you can index into like a dataframe, an array or a list.

data = [0,1,2,3,4,5,6,7,8,9]
index_row(data,4)

4

index_row(pd.Series(data),7)

7

data_df = pd.DataFrame([data[::-1],data]).transpose()
data_df

index_row(data_df,7)

0    2
1    7
Name: 7, dtype: int64

to_match = np.arange(12)
listify('a',to_match)

['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a']

listify('a',5)

['a', 'a', 'a', 'a', 'a']

listify(77.1,3)

[77.1, 77.1, 77.1]

listify( (1,2,3) )

[1, 2, 3]

listify((1,2,3),('a','b','c'))

[1, 2, 3]

Splitting is done here with random.uniform() so you may not get the exact split percentage for small data sets

data = np.arange(20).reshape(10,2)
data.tolist()

[[0, 1],
 [2, 3],
 [4, 5],
 [6, 7],
 [8, 9],
 [10, 11],
 [12, 13],
 [14, 15],
 [16, 17],
 [18, 19]]

random_split(0.20,data.tolist())

[(array([[ 0,  1],
         [ 2,  3],
         [ 4,  5],
         [ 6,  7],
         [ 8,  9],
         [10, 11],
         [12, 13],
         [14, 15],
         [16, 17],
         [18, 19]]),), (array([], shape=(0, 2), dtype=int64),)]

random_split(0.20,pd.DataFrame(data))

[(array([[ 0,  1],
         [ 4,  5],
         [ 8,  9],
         [10, 11],
         [16, 17],
         [18, 19]]),), (array([[ 2,  3],
         [ 6,  7],
         [12, 13],
         [14, 15]]),)]

range_of([5,4,3])

[0, 1, 2]

range_of(np.arange(10)[::-1])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

data_df = pd.DataFrame.from_dict({"a":[1,1,1,2,2,2],"b":['f','e','f','g','g','g']})
data_df

data_df['b']

0    f
1    e
2    f
3    g
4    g
5    g
Name: b, dtype: object

series2cat(data_df,'b')
data_df['b']

0    f
1    e
2    f
3    g
4    g
5    g
Name: b, dtype: category
Categories (3, object): [e < f < g]

series2cat(data_df,'a')
data_df['a']

0    1
1    1
2    1
3    2
4    2
5    2
Name: a, dtype: category
Categories (2, int64): [1 < 2]

key_word_args = {'url':'http://fast.ai','dest':'./','new_var':[1,2,3],'testvalue':42}
split_kwargs_by_func(key_word_args,download_url)

({'url': 'http://fast.ai', 'dest': './'},
 {'new_var': [1, 2, 3], 'testvalue': 42})

to_int(3.1415)

3

data = [1.2,3.4,7.25]
to_int(data)

[1, 3, 7]

uniqueify( pd.Series(data=['a','a','b','b','f','g']) )

['a', 'b', 'f', 'g']

Metaclasses¶

show_doc(PrePostInitMeta)

class _T(metaclass=PrePostInitMeta):
    def __pre_init__(self):  self.a  = 0; assert self.a==0
    def __init__(self):      self.a += 1; assert self.a==1
    def __post_init__(self): self.a += 1; assert self.a==2

t = _T()
t.a

2

Files management and downloads¶

Multiprocessing¶

func must accept both the value and index of each arr element.

def my_func(value, index):
    print("Index: {}, Value: {}".format(index, value))
 
my_array = [i*2 for i in range(5)]
parallel(my_func, my_array, max_workers=3)

Index: 0, Value: 0
Index: 1, Value: 2
Index: 2, Value: 4
Index: 4, Value: 8
Index: 3, Value: 6

Data block API¶

All items used in fastai should subclass this. Must have a data field that will be used when collating in mini-batches.

The default behavior is to set the string representation of this object as title of ax.

Create a Category with an obj of index data in a certain classes list.

Create a MultiCategory with an obj that is a collection of labels. data corresponds to the one-hot encoded labels and raw is a list of associated string.

Others¶

camel2snake('DeviceDataLoader')

'device_data_loader'

In linear scales each element is equidistant from its neighbors:

# from 1 to 10 in 5 steps
t = np.linspace(1, 10, 5)
t

array([ 1.  ,  3.25,  5.5 ,  7.75, 10.  ])

for i in range(len(t) - 1):
    print(t[i+1] - t[i])

2.25
2.25
2.25
2.25

In logarithmic scales, each element is a multiple of the previous entry:

t = even_mults(1, 10, 5)
t

array([ 1.      ,  1.778279,  3.162278,  5.623413, 10.      ])

# notice how each number is a multiple of its predecessor
for i in range(len(t) - 1):
    print(t[i+1] / t[i])

1.7782794100389228
1.7782794100389228
1.7782794100389228
1.7782794100389228

func_args(download_url)

('url',
 'dest',
 'overwrite',
 'pbar',
 'show_progress',
 'chunk_size',
 'timeout',
 'retries')

Additionally, func_args can be used with functions that do not belong to the fastai library

func_args(np.linspace)

('start', 'stop', 'num', 'endpoint', 'retstep', 'dtype')

Return x.

# object is returned as-is
noop([1,2,3])

[1, 2, 3]

One-hot encoding is a standard machine learning technique. Assume we are dealing with a 10-class classification problem and we are supplied a list of labels:

y = [1, 4, 4, 5, 7, 9, 2, 4, 0]

jekyll_note("""y is zero-indexed, therefore its first element (1) belongs to class 2, its second element (4) to class 5 and so on.""")

len(y)

9

y can equivalently be expressed as a matrix of 9 rows and 10 columns, where each row represents one element of the original y.

for label in y:
    print(one_hot(label, 10))

[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

# select 3 elements from a list
some_data = show_some([10, 20, 30, 40, 50], 3) 
some_data

'10,20,30...'

type(some_data)

str

# the separator can be changed
some_data = show_some([10, 20, 30, 40, 50], 3, sep = '---') 
some_data

'10---20---30...'

some_data[:-3]

'10---20---30'

show_some can take as input any class with __len__ and __getitem__

class Any(object):
    def __init__(self, data):
        self.data = data
    def __len__(self):
        return len(self.data)
    def __getitem__(self,i):
        return self.data[i]
 
some_other_data = Any('nice')
show_some(some_other_data, 2)

'n,i...'

Deprecated: This is v1 of fastai, which is not supported.

core

Basic core¶

Global constants¶

Check functions¶

has_arg[source][test]

ifnone[source][test]

is1d[source][test]

is_listy[source][test]

is_tuple[source][test]

Collection related functions¶

arange_of[source][test]

array[source][test]

arrays_split[source][test]

chunks[source][test]

df_names_to_idx[source][test]

extract_kwargs[source][test]

idx_dict[source][test]

index_row[source][test]

listify[source][test]

random_split[source][test]

range_of[source][test]

series2cat[source][test]

split_kwargs_by_func[source][test]

to_int[source][test]

uniqueify[source][test]

Metaclasses¶

class PrePostInitMeta[source][test]

Files management and downloads¶

download_url[source][test]

find_classes[source][test]

join_path[source][test]

join_paths[source][test]

loadtxt_str[source][test]

save_texts[source][test]

Multiprocessing¶

num_cpus[source][test]

parallel[source][test]

partition[source][test]

partition_by_cores[source][test]

Data block API¶

class ItemBase[source][test]

apply_tfms[source][test]

show[source][test]

class Category[source][test]

class EmptyLabel[source][test]

class MultiCategory[source][test]

class FloatItem[source][test]

Others¶

camel2snake[source][test]

even_mults[source][test]

func_args[source][test]

noop[source][test]

one_hot[source][test]

show_some[source][test]

subplots[source][test]

text2html_table[source][test]

`has_arg`[source][test]

`ifnone`[source][test]

`is1d`[source][test]

`is_listy`[source][test]

`is_tuple`[source][test]

`arange_of`[source][test]

`array`[source][test]

`arrays_split`[source][test]

`chunks`[source][test]

`df_names_to_idx`[source][test]

`extract_kwargs`[source][test]

`idx_dict`[source][test]

`index_row`[source][test]

`listify`[source][test]

`random_split`[source][test]

`range_of`[source][test]

`series2cat`[source][test]

`split_kwargs_by_func`[source][test]

`to_int`[source][test]

`uniqueify`[source][test]

`class` `PrePostInitMeta`[source][test]

`download_url`[source][test]

`find_classes`[source][test]

`join_path`[source][test]

`join_paths`[source][test]

`loadtxt_str`[source][test]

`save_texts`[source][test]

`num_cpus`[source][test]

`parallel`[source][test]

`partition`[source][test]

`partition_by_cores`[source][test]

`class` `ItemBase`[source][test]

`apply_tfms`[source][test]

`show`[source][test]

`class` `Category`[source][test]

`class` `EmptyLabel`[source][test]

`class` `MultiCategory`[source][test]

`class` `FloatItem`[source][test]

`camel2snake`[source][test]

`even_mults`[source][test]

`func_args`[source][test]

`noop`[source][test]

`one_hot`[source][test]

`show_some`[source][test]

`subplots`[source][test]

`text2html_table`[source][test]