LIME paper: Recurrent Neural Network for Solubility Prediciton
Import packages and set up RNN
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import tensorflow as tf
import selfies as sf
import exmol
from dataclasses import dataclass
from rdkit.Chem.Draw import rdDepictor, MolsToGridImage
from rdkit.Chem import MolFromSmiles
import random
rdDepictor.SetPreferCoordGen(True)
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager
import urllib.request
urllib.request.urlretrieve(
"https://github.com/google/fonts/raw/main/ofl/ibmplexmono/IBMPlexMono-Regular.ttf",
"IBMPlexMono-Regular.ttf",
)
fe = font_manager.FontEntry(fname="IBMPlexMono-Regular.ttf", name="plexmono")
font_manager.fontManager.ttflist.append(fe)
plt.rcParams.update(
{
"axes.facecolor": "#f5f4e9",
"grid.color": "#AAAAAA",
"axes.edgecolor": "#333333",
"figure.facecolor": "#FFFFFF",
"axes.grid": False,
"axes.prop_cycle": plt.cycler("color", plt.cm.Dark2.colors),
"font.family": fe.name,
"figure.figsize": (3.5, 3.5 / 1.2),
"ytick.left": True,
"xtick.bottom": True,
}
)
mpl.rcParams["font.size"] = 12
soldata = pd.read_csv(
"https://github.com/whitead/dmol-book/raw/main/data/curated-solubility-dataset.csv"
)
features_start_at = list(soldata.columns).index("MolWt")
np.random.seed(0)
random.seed(0)
2025-03-21 16:57:43.291917: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-21 16:57:43.295463: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-21 16:57:43.304339: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1742576263.319035 41355 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1742576263.323508 41355 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1742576263.335902 41355 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1742576263.335919 41355 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1742576263.335921 41355 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1742576263.335922 41355 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
2025-03-21 16:57:43.340378: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
# scramble them
soldata = soldata.sample(frac=0.01, random_state=0).reset_index(drop=True)
soldata.head()
ID | Name | InChI | InChIKey | SMILES | Solubility | SD | Ocurrences | Group | MolWt | ... | NumRotatableBonds | NumValenceElectrons | NumAromaticRings | NumSaturatedRings | NumAliphaticRings | RingCount | TPSA | LabuteASA | BalabanJ | BertzCT | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | B-4206 | diuron | InChI=1S/C9H10Cl2N2O/c1-13(2)9(14)12-6-3-4-7(1... | XMTQQYYKAHVGBJ-UHFFFAOYSA-N | CN(C)C(=O)Nc1ccc(Cl)c(Cl)c1 | -3.744300 | 1.227164 | 5 | G4 | 233.098 | ... | 1.0 | 76.0 | 1.0 | 0.0 | 0.0 | 1.0 | 32.34 | 92.603980 | 2.781208 | 352.665233 |
1 | F-988 | 7-(3-amino-3-methylazetidin-1-yl)-8-chloro-1-c... | InChI=1S/C17H17ClFN3O3/c1-17(20)6-21(7-17)14-1... | DUNZFXZSFJLIKR-UHFFFAOYSA-N | CC1(N)CN(C2=C(Cl)C3=C(C=C2F)C(=O)C(C(=O)O)=CN3... | -5.330000 | 0.000000 | 1 | G1 | 365.792 | ... | 3.0 | 132.0 | 2.0 | 2.0 | 2.0 | 4.0 | 88.56 | 147.136366 | 2.001398 | 973.487509 |
2 | C-1996 | 4-acetoxybiphenyl; 4-biphenylyl acetate | InChI=1S/C14H12O2/c1-11(15)16-14-9-7-13(8-10-1... | MISFQCBPASYYGV-UHFFFAOYSA-N | CC(=O)OC1=CC=C(C=C1)C2=CC=CC=C2 | -4.400000 | 0.000000 | 1 | G1 | 212.248 | ... | 2.0 | 80.0 | 2.0 | 0.0 | 0.0 | 2.0 | 26.30 | 94.493449 | 2.228677 | 471.848345 |
3 | A-3055 | methane dimolybdenum | InChI=1S/CH4.2Mo/h1H4;; | JAGQSESDQXCFCH-UHFFFAOYSA-N | C.[Mo].[Mo] | -3.420275 | 0.409223 | 2 | G3 | 207.923 | ... | 0.0 | 20.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 49.515427 | -0.000000 | 2.754888 |
4 | A-2575 | ethyl 4-[[(methylphenylamino)methylene]amino]b... | InChI=1S/C17H18N2O2/c1-3-21-17(20)14-9-11-15(1... | GNGYPJUKIKDJQT-UHFFFAOYSA-N | CCOC(=O)c1ccc(cc1)N=CN(C)c2ccccc2 | -5.450777 | 0.000000 | 1 | G1 | 282.343 | ... | 5.0 | 108.0 | 2.0 | 0.0 | 0.0 | 2.0 | 41.90 | 124.243431 | 2.028889 | 606.447052 |
5 rows × 26 columns
from rdkit.Chem import MolToSmiles
def _randomize_smiles(mol, isomericSmiles=True):
return MolToSmiles(
mol,
canonical=False,
doRandom=True,
isomericSmiles=isomericSmiles,
kekuleSmiles=random.random() < 0.5,
)
smiles = list(soldata["SMILES"])
solubilities = list(soldata["Solubility"])
aug_data = 10
def largest_mol(smiles):
ss = smiles.split(".")
ss.sort(key=lambda a: len(a))
return ss[-1]
aug_smiles = []
aug_solubilities = []
for sml, sol in zip(smiles, solubilities):
sml = largest_mol(sml)
if len(sml) <= 4:
continue # ion or metal
new_smls = []
new_smls.append(sml)
aug_solubilities.append(sol)
for _ in range(aug_data):
try:
new_sml = _randomize_smiles(MolFromSmiles(sml))
if new_sml not in new_smls:
new_smls.append(new_sml)
aug_solubilities.append(sol)
except:
continue
aug_smiles.extend(new_smls)
aug_df_AqSolDB = pd.DataFrame(
data={"SMILES": aug_smiles, "Solubility": aug_solubilities}
)
print(f"The dataset was augmented from {len(soldata)} to {len(aug_df_AqSolDB)}.")
The dataset was augmented from 100 to 990.
selfies_list = []
for s in aug_df_AqSolDB.SMILES:
try:
selfies_list.append(sf.encoder(exmol.sanitize_smiles(s)[1]))
except sf.EncoderError:
selfies_list.append(None)
len(selfies_list)
990
basic = set(exmol.get_basic_alphabet())
data_vocab = set(
sf.get_alphabet_from_selfies([s for s in selfies_list if s is not None])
)
vocab = ["[nop]"]
vocab.extend(list(data_vocab.union(basic)))
vocab_stoi = {o: i for o, i in zip(vocab, range(len(vocab)))}
def selfies2ints(s):
result = []
for token in sf.split_selfies(s):
if token in vocab_stoi:
result.append(vocab_stoi[token])
else:
result.append(np.nan)
# print('Warning')
return result
def ints2selfies(v):
return "".join([vocab[i] for i in v])
# test them out
s = selfies_list[0]
print("selfies:", s)
v = selfies2ints(s)
print("selfies2ints:", v)
so = ints2selfies(v)
selfies: [C][N][Branch1][C][C][C][=Branch1][C][=O][N][C][=C][C][=C][Branch1][C][Cl][C][Branch1][C][Cl][=C][Ring1][Branch2]
selfies2ints: [1, 28, 17, 1, 1, 1, 29, 1, 5, 28, 1, 25, 1, 25, 17, 1, 47, 1, 17, 1, 47, 25, 7, 31]
# creating an object
@dataclass
class Config:
vocab_size: int
example_number: int
batch_size: int
buffer_size: int
embedding_dim: int
rnn_units: int
hidden_dim: int
drop_rate: float
config = Config(
vocab_size=len(vocab),
example_number=len(selfies_list),
batch_size=128,
buffer_size=10000,
embedding_dim=64,
hidden_dim=32,
rnn_units=64,
drop_rate=0.20,
)
# now get sequences
encoded = [selfies2ints(s) for s in selfies_list if s is not None]
# check for non-Nones
dsolubilities = aug_df_AqSolDB.Solubility.values[[s is not None for s in selfies_list]]
padded_seqs = tf.keras.preprocessing.sequence.pad_sequences(encoded, padding="post")
# Should be shuffled from the beginning, so no worries
N = len(padded_seqs)
split = int(0.1 * N)
# Now build dataset
test_data = tf.data.Dataset.from_tensor_slices(
(padded_seqs[:split], dsolubilities[:split])
).batch(config.batch_size)
nontest = tf.data.Dataset.from_tensor_slices(
(
padded_seqs[split:],
dsolubilities[split:],
)
)
val_data, train_data = nontest.take(split).batch(config.batch_size), nontest.skip(
split
).shuffle(config.buffer_size).batch(config.batch_size).prefetch(
tf.data.experimental.AUTOTUNE
)
2025-03-21 16:57:47.254223: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
model = tf.keras.Sequential()
# make embedding and indicate that 0 should be treated as padding mask
model.add(
tf.keras.layers.Embedding(
input_dim=config.vocab_size, output_dim=config.embedding_dim, mask_zero=True
)
)
model.add(tf.keras.layers.Dropout(config.drop_rate))
# RNN layer
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(config.rnn_units)))
model.add(tf.keras.layers.Dropout(config.drop_rate))
# a dense hidden layer
model.add(tf.keras.layers.Dense(config.hidden_dim, activation="relu"))
model.add(tf.keras.layers.Dropout(config.drop_rate))
# regression, so no activation
model.add(tf.keras.layers.Dense(1))
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ embedding (Embedding) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ ? │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ bidirectional (Bidirectional) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ ? │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_2 (Dropout) │ ? │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ ? │ 0 (unbuilt) │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 0 (0.00 B)
Trainable params: 0 (0.00 B)
Non-trainable params: 0 (0.00 B)
model.compile(tf.optimizers.Adam(1e-3), loss="mean_squared_error")
# verbose=0 silences output, to get progress bar set verbose=1
result = model.fit(train_data, validation_data=val_data, epochs=50)
Epoch 1/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 18s 3s/step - loss: 14.0095
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 98ms/step - loss: 14.7054
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 14.5801
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 14.3059
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 13.9964
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 13.7045
7/7 ━━━━━━━━━━━━━━━━━━━━ 4s 135ms/step - loss: 13.3089 - val_loss: 2.5035
Epoch 2/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 8.3348
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 7.7204
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 7.1681
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 6.7815
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 6.5218
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 6.3758
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 6.1880 - val_loss: 2.5428
Epoch 3/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 5.0774
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 4.9307
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 4.7400
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 4.6676
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 4.5958
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 4.5556
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 4.4864 - val_loss: 0.7035
Epoch 4/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 102ms/step - loss: 4.7696
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 4.7207
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 4.6845
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 4.6554
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 4.6030
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 4.5443
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 4.4844 - val_loss: 0.8009
Epoch 5/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 3.4855
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.6554
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.7484
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.8562
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 3.8890
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 3.9052
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 3.9312 - val_loss: 1.0688
Epoch 6/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 3.1023
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.4017
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.4880
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 3.5546
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 3.6032
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.6318
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 3.6637 - val_loss: 0.5587
Epoch 7/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 4s 731ms/step - loss: 3.2221
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.3159
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 3.3776
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 3.3773
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.3693
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 3.3647
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 3.3702 - val_loss: 0.9174
Epoch 8/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 3.1316
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 3.0337
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 3.0283
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 3.0277
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 3.0247
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 2.9950
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 2.9691 - val_loss: 1.4015
Epoch 9/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.5835
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 2.7271
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 2.8049
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.8123
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.8338
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.8294
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 2.8230 - val_loss: 1.0823
Epoch 10/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.5376
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.6927
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 2.6646
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 2.6321
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.5946
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.5661
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 95ms/step - loss: 2.5274 - val_loss: 1.6807
Epoch 11/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 1.9503
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 1.9959
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.0501
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 2.0473
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 2.0664
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 2.1018
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 2.1441 - val_loss: 0.7422
Epoch 12/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.9736
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 2.0112
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 2.0796
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 2.1120
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 2.1272
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.1438
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 2.1610 - val_loss: 0.7084
Epoch 13/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.6782
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.7711
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.8201
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.8421
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.8574
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.8841
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 87ms/step - loss: 1.9199 - val_loss: 0.6789
Epoch 14/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 2.2603
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 90ms/step - loss: 2.1053
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 2.0683
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 2.0557
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.0382
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 2.0220
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 2.0025 - val_loss: 0.6004
Epoch 15/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.4396
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.6829
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.7690
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.7797
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.8015
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.8079
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 1.8148 - val_loss: 0.7789
Epoch 16/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 102ms/step - loss: 1.5386
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 98ms/step - loss: 1.5694
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 1.6159
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 1.6580
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.6700
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.6745
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 1.6762 - val_loss: 0.7709
Epoch 17/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.7459
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.7165
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.7023
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.7261
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.7309
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.7389
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 91ms/step - loss: 1.7432 - val_loss: 1.3610
Epoch 18/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 1.1035
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.2261
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.3072
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.3761
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.4251
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4530
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 1.4884 - val_loss: 0.9088
Epoch 19/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.7316
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 90ms/step - loss: 1.7540
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.7078
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.6860
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.6675
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.6519
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.6392 - val_loss: 0.6044
Epoch 20/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 4s 738ms/step - loss: 1.3891
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.3804
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.3712
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4083
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4376
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4573
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 1.4823 - val_loss: 0.5517
Epoch 21/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.5029
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.5207
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.5249
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.5016
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4949
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4946
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 1.4919 - val_loss: 0.6116
Epoch 22/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4647
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4002
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.4067
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.4107
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.4101
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.4047
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.3973 - val_loss: 0.5759
Epoch 23/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.1278
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 99ms/step - loss: 1.2823
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.3543
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.3946
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.4177
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.4284
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 1.4382 - val_loss: 0.7125
Epoch 24/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.6346
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.6547
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.6335
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.6157
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.6157
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.6125
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 1.6094 - val_loss: 0.6905
Epoch 25/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.9687
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.8364
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.7412
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.7135
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.6913
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.6664
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 1.6329 - val_loss: 0.6220
Epoch 26/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.6757
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.5752
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.5452
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.5301
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.5116
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.5081
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 91ms/step - loss: 1.5043 - val_loss: 0.6611
Epoch 27/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.3348
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.3614
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.3617
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.3507
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.3454
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.3381
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.3324 - val_loss: 0.7922
Epoch 28/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.4497
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 100ms/step - loss: 1.4316
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.3965
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.3765
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.3609
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.3540
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 1.3459 - val_loss: 0.6822
Epoch 29/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.9347
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.7129
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.6502
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.6008
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.5596
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.5319
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 91ms/step - loss: 1.4913 - val_loss: 0.6978
Epoch 30/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 99ms/step - loss: 1.1954
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.1266
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.1251
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.1445
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.1699
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.1852
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.2023 - val_loss: 0.9596
Epoch 31/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.4529
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.4142
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.3646
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.3295
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.2951
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.2753
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 1.2498 - val_loss: 1.2314
Epoch 32/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 99ms/step - loss: 0.8991
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 0.9682
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.0206
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.0651
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.0914
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.1069
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.1271 - val_loss: 1.0906
Epoch 33/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 108ms/step - loss: 1.1666
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.1686
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.1542
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.1397
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.1448
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.1514
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.1576 - val_loss: 1.3478
Epoch 34/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 99ms/step - loss: 0.9905
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.0411
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.0859
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.1167
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 1.1422
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.1523
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 91ms/step - loss: 1.1657 - val_loss: 1.0777
Epoch 35/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.1915
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.1742
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.2064
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.2145
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.2070
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1977
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 94ms/step - loss: 1.1881 - val_loss: 1.3183
Epoch 36/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.3291
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 90ms/step - loss: 1.3121
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.2735
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.2448
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.2353
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.2267
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 87ms/step - loss: 1.2214 - val_loss: 0.7402
Epoch 37/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.0897
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1241
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1330
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1512
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1737
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1983
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.2294 - val_loss: 0.9721
Epoch 38/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.1522
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.1925
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.2220
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.2530
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.2694
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.2751
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 86ms/step - loss: 1.2879 - val_loss: 0.9550
Epoch 39/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 105ms/step - loss: 1.3140
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.2405
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.2009
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.2023
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.2057
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.2066
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 1.2043 - val_loss: 1.1590
Epoch 40/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.2408
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.1933
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.2024
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1983
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1916
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1870
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 1.1785 - val_loss: 1.0078
Epoch 41/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.0994
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.1810
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.1958
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.1972
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.1977
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.2035
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 87ms/step - loss: 1.2063 - val_loss: 1.0074
Epoch 42/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.1801
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 90ms/step - loss: 1.1646
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.1245
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.1072
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.1042
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.1029
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 1.0998 - val_loss: 1.0922
Epoch 43/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 90ms/step - loss: 0.9198
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 0.9304
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 0.9735
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 0.9890
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 0.9998
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.0141
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 87ms/step - loss: 1.0315 - val_loss: 0.8406
Epoch 44/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 0.8411
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 90ms/step - loss: 0.9581
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 0.9895
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 91ms/step - loss: 1.0010
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.0163
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.0297
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 88ms/step - loss: 1.0453 - val_loss: 0.7853
Epoch 45/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 4s 734ms/step - loss: 1.2583
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 103ms/step - loss: 1.2181
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 98ms/step - loss: 1.1922
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 1.1777
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.1662
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.1568
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 1.1455 - val_loss: 0.7445
Epoch 46/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.3048
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.3118
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.2634
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.2255
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 92ms/step - loss: 1.2026
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.1816
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.1557 - val_loss: 1.0840
Epoch 47/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 103ms/step - loss: 1.0276
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.0013
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.0125
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.0240
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 93ms/step - loss: 1.0360
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.0383
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.0438 - val_loss: 1.4373
Epoch 48/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 0.9197
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 0.9819
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.0062
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 1.0173
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.0236
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.0359
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 96ms/step - loss: 1.0513 - val_loss: 1.0816
Epoch 49/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 101ms/step - loss: 0.8515
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 97ms/step - loss: 0.8715
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 0.9050
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 0.9329
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 0.9423
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 0.9499
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 90ms/step - loss: 0.9589 - val_loss: 1.0225
Epoch 50/50
1/7 ━━━━━━━━━━━━━━━━━━━━ 0s 105ms/step - loss: 1.1318
2/7 ━━━━━━━━━━━━━━━━━━━━ 0s 96ms/step - loss: 1.1152
3/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.1011
4/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.1067
5/7 ━━━━━━━━━━━━━━━━━━━━ 0s 94ms/step - loss: 1.1033
6/7 ━━━━━━━━━━━━━━━━━━━━ 0s 95ms/step - loss: 1.1018
7/7 ━━━━━━━━━━━━━━━━━━━━ 1s 89ms/step - loss: 1.1009 - val_loss: 1.7295
model.save("solubility-rnn-accurate.keras")
# model = tf.keras.models.load_model('solubility-rnn-accurate.keras')
plt.figure(figsize=(5, 3.5))
plt.plot(result.history["loss"], label="training")
plt.plot(result.history["val_loss"], label="validation")
plt.legend()
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.savefig("rnn-loss.png", bbox_inches="tight", dpi=300)
plt.show()

yhat = []
test_y = []
for x, y in test_data:
yhat.extend(model(x).numpy().flatten())
test_y.extend(y.numpy().flatten())
yhat = np.array(yhat)
test_y = np.array(test_y)
# plot test data
plt.figure(figsize=(5, 3.5))
plt.plot(test_y, test_y, ":")
plt.plot(test_y, yhat, ".")
plt.text(
max(test_y) - 6,
min(test_y) + 1,
f"correlation = {np.corrcoef(test_y, yhat)[0,1]:.3f}",
)
plt.text(
max(test_y) - 6, min(test_y), f"loss = {np.sqrt(np.mean((test_y - yhat)**2)):.3f}"
)
plt.xlabel(r"$y$")
plt.ylabel(r"$\hat{y}$")
plt.title("Testing Data")
plt.savefig("rnn-fit.png", dpi=300, bbox_inches="tight")
plt.show()
2025-03-21 16:58:25.014086: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence

LIME explanations
In the following example, we find out what descriptors influence solubility of a molecules. For example, let’s say we have a molecule with LogS=1.5. We create a perturbed chemical space around that molecule using stoned
method and then use lime
to find out which descriptors affect solubility predictions for that molecule.
Wrapper function for RNN, to use in STONED
# Predictor function is used as input to sample_space function
def predictor_function(smile_list, selfies):
encoded = [selfies2ints(s) for s in selfies]
# check for nans
valid = [1.0 if sum(e) > 0 else np.nan for e in encoded]
encoded = [np.nan_to_num(e, nan=0) for e in encoded]
padded_seqs = tf.keras.preprocessing.sequence.pad_sequences(encoded, padding="post")
labels = np.reshape(model(padded_seqs, training=False), (-1))
return labels * valid
Descriptor explanations
# Make sure SMILES doesn't contain multiple fragments
smi = "CCCCC(=O)N(CC1=CC=C(C=C1)C2=C(C=CC=C2)C3=NN=N[NH]3)C(C(C)C)C(O)=O" # mol1 - not soluble
# smi = "CC(CC(=O)NC1=CC=CC=C1)C(=O)O" #mol2 - highly soluble
af = exmol.get_basic_alphabet()
stoned_kwargs = {
"num_samples": 5000,
"alphabet": af,
"max_mutations": 2,
}
space = exmol.sample_space(
smi, predictor_function, stoned_kwargs=stoned_kwargs, quiet=True
)
print(len(space))
4166
from IPython.display import display, SVG
desc_type = ["Classic", "ecfp", "maccs"]
for d in desc_type:
beta = exmol.lime_explain(space, descriptor_type=d)
if d == "ecfp":
display(
SVG(
exmol.plot_descriptors(
space, output_file=f"{d}_mol2.svg", return_svg=True
)
)
)
plt.close()
else:
exmol.plot_descriptors(space, output_file=f"{d}_mol2.svg")
SMARTS annotations for MACCS descriptors were created using SMARTSviewer (smartsview.zbh.uni-hamburg.de, Copyright: ZBH, Center for Bioinformatics Hamburg) developed by K. Schomburg et. al. (J. Chem. Inf. Model. 2010, 50, 9, 1529–1535)

Text explanations
exmol.lime_explain(space, "ecfp")
s1_ecfp = exmol.text_explain(space, "ecfp")
explanation = exmol.text_explain_generate(s1_ecfp, "aqueous solubility")
print(explanation)
The presence of a hetero N nonbasic/heteroaromatic/aromatic group in the molecule significantly enhances its aqueous solubility. This structural feature likely contributes to solubility through increased polarity and potential hydrogen bonding interactions with water molecules. The hetero N atom, being part of an aromatic system, can engage in π-π interactions and may also stabilize the solute in the aqueous phase. If this hetero N group were absent, the molecule would likely exhibit reduced aqueous solubility due to a decrease in these favorable interactions. Thus, the hetero N nonbasic/heteroaromatic/aromatic group is crucial for maintaining and enhancing the molecule's solubility in water.
Similarity map
beta = exmol.lime_explain(space, "ecfp")
svg = exmol.plot_utils.similarity_map_using_tstats(space[0], return_svg=True)
display(SVG(svg))
# Write figure to file
with open("ecfp_similarity_map_mol2.svg", "w") as f:
f.write(svg)
# Inspect space
MolsToGridImage(
[MolFromSmiles(m.smiles) for m in space],
legends=[f"yhat = {m.yhat:.3}" for m in space],
molsPerRow=10,
maxMols=100,
)
/opt/hostedtoolcache/Python/3.12.9/x64/lib/python3.12/site-packages/rdkit/Chem/Draw/IPythonConsole.py:343: UserWarning: Truncating the list of molecules to be displayed to 100. Change the maxMols value to display more.
warnings.warn(

How’s the fit?
fkw = {"figsize": (6, 4)}
font = {"family": "normal", "weight": "normal", "size": 16}
fig = plt.figure(figsize=(10, 5))
mpl.rc("axes", titlesize=12)
mpl.rc("font", size=16)
ax_dict = fig.subplot_mosaic("AABBB")
# Plot space by fit
svg = exmol.plot_utils.plot_space_by_fit(
space,
[space[0]],
figure_kwargs=fkw,
mol_size=(200, 200),
offset=1,
ax=ax_dict["B"],
beta=beta,
)
# Compute y_wls
w = np.array([1 / (1 + (1 / (e.similarity + 0.000001) - 1) ** 5) for e in space])
non_zero = w > 10 ** (-6)
w = w[non_zero]
N = w.shape[0]
ys = np.array([e.yhat for e in space])[non_zero].reshape(N).astype(float)
x_mat = np.array([list(e.descriptors.descriptors) for e in space])[non_zero].reshape(
N, -1
)
y_wls = x_mat @ beta
y_wls += np.mean(ys)
lower = np.min(ys)
higher = np.max(ys)
# set transparency using w
norm = plt.Normalize(min(w), max(w))
cmap = plt.cm.Oranges(w)
cmap[:, -1] = w
def weighted_mean(x, w):
return np.sum(x * w) / np.sum(w)
def weighted_cov(x, y, w):
return np.sum(w * (x - weighted_mean(x, w)) * (y - weighted_mean(y, w))) / np.sum(w)
def weighted_correlation(x, y, w):
return weighted_cov(x, y, w) / np.sqrt(
weighted_cov(x, x, w) * weighted_cov(y, y, w)
)
corr = weighted_correlation(ys, y_wls, w)
ax_dict["A"].plot(
np.linspace(lower, higher, 100), np.linspace(lower, higher, 100), "--", linewidth=2
)
sc = ax_dict["A"].scatter(ys, y_wls, s=50, marker=".", c=cmap, cmap=cmap)
ax_dict["A"].text(max(ys) - 3, min(ys) + 1, f"weighted \ncorrelation = {corr:.3f}")
ax_dict["A"].set_xlabel(r"$\hat{y}$")
ax_dict["A"].set_ylabel(r"$g$")
ax_dict["A"].set_title("Weighted Least Squares Fit")
ax_dict["A"].set_xlim(lower, higher)
ax_dict["A"].set_ylim(lower, higher)
ax_dict["A"].set_aspect(1.0 / ax_dict["A"].get_data_ratio(), adjustable="box")
sm = plt.cm.ScalarMappable(cmap=plt.cm.Oranges, norm=norm)
cbar = plt.colorbar(sm, orientation="horizontal", pad=0.15, ax=ax_dict["A"])
cbar.set_label("Chemical similarity")
plt.tight_layout()
plt.savefig("weighted_fit.svg", dpi=300, bbox_inches="tight", transparent=False)
/tmp/ipykernel_41355/2076087530.py:60: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
sc = ax_dict["A"].scatter(ys, y_wls, s=50, marker=".", c=cmap, cmap=cmap)
/tmp/ipykernel_41355/2076087530.py:71: UserWarning: Tight layout not applied. tight_layout cannot make Axes width small enough to accommodate all Axes decorations
plt.tight_layout()

Robustness to incomplete sampling
We first sample a reference chemical space, and then subsample smaller chemical spaces from this reference. Rank correlation is computed between important descriptors for the smaller subspaces and the reference space.
# Sample a big space
stoned_kwargs = {
"num_samples": 5000,
"alphabet": exmol.get_basic_alphabet(),
"max_mutations": 2,
}
space = exmol.sample_space(
smi, predictor_function, stoned_kwargs=stoned_kwargs, quiet=True
)
len(space)
4202
# get descriptor attributions
exmol.lime_explain(space, "MACCS", return_beta=False)
# Assign feature ids for rank comparison
features = features = {
a: b
for a, b in zip(
space[0].descriptors.descriptor_names,
np.arange(len(space[0].descriptors.descriptors)),
)
}
# Get set of ranks for the reference space
baseline_imp = {
a: b
for a, b in zip(space[0].descriptors.descriptor_names, space[0].descriptors.tstats)
if not np.isnan(b)
}
baseline_imp = dict(
sorted(baseline_imp.items(), key=lambda item: abs(item[1]), reverse=True)
)
baseline_set = [features[x] for x in baseline_imp.keys()]
# Get subsets and calculate lime importances - subsample - get rank correlation
from scipy.stats import spearmanr
plt.figure(figsize=(4, 3))
N = len(space)
size = np.arange(500, N, 1000)
rank_corr = {N: 1}
for i, f in enumerate(size):
# subsample space
rank_corr[f] = []
for _ in range(10):
# subsample space of size f
idx = np.random.choice(np.arange(N), size=f, replace=False)
subspace = [space[i] for i in idx]
# get desc attributions
ss_beta = exmol.lime_explain(subspace, descriptor_type="MACCS")
ss_imp = {
a: b
for a, b in zip(
subspace[0].descriptors.descriptor_names, subspace[0].descriptors.tstats
)
if not np.isnan(b)
}
ss_imp = dict(
sorted(ss_imp.items(), key=lambda item: abs(item[1]), reverse=True)
)
ss_set = [features[x] for x in ss_imp.keys()]
# Get ranks for subsampled space and compare with reference
ranks = {a: [b] for a, b in zip(baseline_set[:5], np.arange(1, 6))}
for j, s in enumerate(ss_set):
if s in ranks:
ranks[s].append(j + 1)
# compute rank correlation
r = spearmanr(np.arange(1, 6), [ranks[x][1] for x in ranks])
rank_corr[f].append(r.correlation)
plt.scatter(f, np.mean(rank_corr[f]), color="#13254a", marker="o")
plt.scatter(N, 1.0, color="red", marker="o")
plt.axvline(x=N, linestyle=":", color="red")
plt.xlabel("Size of chemical space")
plt.ylabel("Rank correlation")
plt.tight_layout()
plt.savefig("rank correlation.svg", dpi=300, bbox_inches="tight")

Effect of mutation number, alphabet and size of chemical space
# Mutation
desc_type = ["Classic"]
muts = [1, 2, 3]
for i in muts:
stoned_kwargs = {
"num_samples": 2500,
"alphabet": exmol.get_basic_alphabet(),
"min_mutations": i,
"max_mutations": i,
}
space = exmol.sample_space(
smi, predictor_function, stoned_kwargs=stoned_kwargs, quiet=True
)
for d in desc_type:
exmol.lime_explain(space, descriptor_type=d)
exmol.plot_descriptors(space, title=f"Mutations={i}")



# Alphabet
basic = exmol.get_basic_alphabet()
train = sf.get_alphabet_from_selfies([s for s in selfies_list if s is not None])
wide = sf.get_semantic_robust_alphabet()
desc_type = ["MACCS"]
alphs = {"Basic": basic, "Training Data": train, "SELFIES": wide}
for a in alphs:
stoned_kwargs = {"num_samples": 2500, "alphabet": alphs[a], "max_mutations": 2}
space = exmol.sample_space(
smi, predictor_function, stoned_kwargs=stoned_kwargs, quiet=True
)
for d in desc_type:
exmol.lime_explain(space, descriptor_type=d)
exmol.plot_descriptors(space, title=f"Alphabet: {a}")
SMARTS annotations for MACCS descriptors were created using SMARTSviewer (smartsview.zbh.uni-hamburg.de, Copyright: ZBH, Center for Bioinformatics Hamburg) developed by K. Schomburg et. al. (J. Chem. Inf. Model. 2010, 50, 9, 1529–1535)
SMARTS annotations for MACCS descriptors were created using SMARTSviewer (smartsview.zbh.uni-hamburg.de, Copyright: ZBH, Center for Bioinformatics Hamburg) developed by K. Schomburg et. al. (J. Chem. Inf. Model. 2010, 50, 9, 1529–1535)
SMARTS annotations for MACCS descriptors were created using SMARTSviewer (smartsview.zbh.uni-hamburg.de, Copyright: ZBH, Center for Bioinformatics Hamburg) developed by K. Schomburg et. al. (J. Chem. Inf. Model. 2010, 50, 9, 1529–1535)



# Size of space
desc_type = ["MACCS"]
space_size = [1500, 2000, 2500]
for s in space_size:
stoned_kwargs = {
"num_samples": s,
"alphabet": exmol.get_basic_alphabet(),
"max_mutations": 2,
}
space = exmol.sample_space(
smi, predictor_function, stoned_kwargs=stoned_kwargs, quiet=True
)
for d in desc_type:
exmol.lime_explain(space, descriptor_type=d)
exmol.plot_descriptors(
space,
title=f"Chemical space size={s}",
)
SMARTS annotations for MACCS descriptors were created using SMARTSviewer (smartsview.zbh.uni-hamburg.de, Copyright: ZBH, Center for Bioinformatics Hamburg) developed by K. Schomburg et. al. (J. Chem. Inf. Model. 2010, 50, 9, 1529–1535)
SMARTS annotations for MACCS descriptors were created using SMARTSviewer (smartsview.zbh.uni-hamburg.de, Copyright: ZBH, Center for Bioinformatics Hamburg) developed by K. Schomburg et. al. (J. Chem. Inf. Model. 2010, 50, 9, 1529–1535)
SMARTS annotations for MACCS descriptors were created using SMARTSviewer (smartsview.zbh.uni-hamburg.de, Copyright: ZBH, Center for Bioinformatics Hamburg) developed by K. Schomburg et. al. (J. Chem. Inf. Model. 2010, 50, 9, 1529–1535)


