Also, I renew my call for some conlang nerds to come up with products for fantasy races, where one could get a big, say, dwarvish dictionary, complete with names and surnames and, as
@MNblockhead says, with notations on which names are more common than others. What's the dwarvish version of "John Smith," etc.?
Okay, I whipped together a variant of some code I've written before. It takes a word (name) list and makes new words/names based on three letter frequencies from the original list, counting the start and the end of a word/name. Using some US census data I found online somewhere, I got the following male names from it: Kellyes, Hirontha, Mardolly, Chita, Zarl, Juntabalil, Lenick, Gleshi, Gaddon, Rodiel, Papirver, Kerani, Keriel, Liosz, Kitran, Eranne, Orajohn, Clella.
You could then use the same frequency data to give a "popularity" score to each name. Here's a sample: [(5560, 'Karick'), (5283, 'Alongs'), (4839, 'Tolani'), (4700, 'Herion'), (4695, 'Sirodi'), (4335, 'Porion'), (4204, 'Chels'), (3909, 'Anthael'), (3788, 'Baitn'), (3544, 'Michama'), (3426, 'Saftaro'), (3410, 'Slomana'), (3301, 'Haldena'), (3256, 'Blainte'), (3105, 'Gerleem'), (2357, 'Bhan'), (1117, 'Toicelize'), (721, 'Tyqueredaro')]
Here's some female names for comparison: Kionnasha, Darenee, Khunia, Lalera, Ernet, Tifaela, Gersha, Yerie, Reaun, Kethi, Khylis, Wynniya, Jadahy, Sharal, Raili, Cheryet, Rosara, Caria
Now, these names are from the US census, so they will be biased toward English names, but with various other ethnicities mixed in. But if you could get name lists based on a particular ethnicity, you could generate new names based on that list, and assign those names to a particular fantasy race. Or you could do it with names from a scientific dictionary, or a list of dinosaur names, or a list of names with a 'd' in them.
Code:
"""
nomenclator.py
Yet another fantasy name generator.
"""
import collections
import random
class Nomenclator(object):
def __init__(self, word_list):
# Parse the parts of the words.
self.check = set(word_list)
self.starts = collections.Counter()
self.pairs = collections.defaultdict(collections.Counter)
self.ends = collections.defaultdict(collections.Counter)
self.lengths = collections.Counter()
self.mean_length = 0
for word in word_list:
word = word.strip()
self.lengths[len(word)] += 1
self.mean_length += len(word)
self.starts[word[0]] += 1
word = f'^{word}'
for pair_ndx in range(len(word) - 2):
self.pairs[word[pair_ndx:(pair_ndx + 2)]][word[pair_ndx + 2]] += 1
self.ends[word[-3:-1]][word[-1]] += 1
self.mean_length /= len(word_list)
# Convert the data to a form usable by choices.
self.starts = {'population': tuple(self.starts.keys()),
'weights': tuple(self.starts.values())}
new_pairs = {}
for pair in self.pairs:
new_pairs[pair] = {'population': tuple(self.pairs[pair].keys()),
'weights': tuple(self.pairs[pair].values())}
self.pairs = new_pairs
new_ends = {}
for almost in self.ends:
new_ends[almost] = {'population': tuple(self.ends[almost].keys()),
'weights': tuple(self.ends[almost].values())}
self.ends = new_ends
self.lengths = {'population': tuple(self.lengths.keys()),
'weights': tuple(self.lengths.values())}
def name(self):
"""Generate a new name. (str)"""
while True:
try:
# Get a random start and length.
name = '^' + random.choices(**self.starts)[0]
length = random.choices(**self.lengths)[0]
# Add letters based on the last two letters of the name.
while len(name) < length:
name += random.choices(**self.pairs[name[-2:]])[0]
# Add a final character.
name += random.choices(**self.ends[name[-2:]])[0]
except KeyError:
continue
# Make sure that the name is new.
name = name[1:]
if name not in self.check:
return name
def names(self, n = 18, popularity = False):
"""
Generate a list of unique names. (list)
Parameter:
n: How many names to generate. (int)
"""
name_list = []
while len(name_list) < n:
new_name = self.name()
if new_name not in name_list:
name_list.append(new_name)
if popularity:
name_list = [(self.popularity(name), name) for name in name_list]
name_list.sort(reverse = True)
return name_list
def popularity(self, name):
"""
Get a normalized popularity score for a given name. (int)
Parameters:
name: A name that could be generated by this nomenclator. (str)
"""
len_score = self.lengths['weights'][self.lengths['population'].index(len(name))]
ltr_score = self.starts['weights'][self.starts['population'].index(name[0])]
name = f'^{name}'
for ltr_ndx in range(len(name) - 3):
ltrs = name[ltr_ndx:(ltr_ndx + 2)]
weight_ndx = self.pairs[ltrs]['population'].index(name[ltr_ndx + 2])
ltr_score += self.pairs[ltrs]['weights'][weight_ndx]
ltrs = name[-3:-1]
ltr_score += self.ends[ltrs]['weights'][self.ends[ltrs]['population'].index(name[-1])]
return int((ltr_score / (len(name) - 1)) * self.mean_length + len_score)
if __name__ == '__main__':
census = []
with open(r'..\Data\us-names-by-gender.csv') as name_file:
for line in name_file:
sex, name, count = line.split(',')
if sex == 'F':
census.append(name)
n = Nomenclator(census)
print(', '.join(n.names()))
print(n.names(popularity = True))