๐Ÿ“ฆ andreasjansson / language-detection.el

๐Ÿ“„ sourceclassifier_test.rb ยท 32 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32require 'rubygems'
require 'sourceclassifier'

s = SourceClassifier.new

test_dir = 'test/data/stackoverflow'

languages = [
  'css', 'c', 'java', 'javascript', 'perl', 'php', 'python', 'ruby'
]

total = 0
correct = 0.0

Dir.foreach("#{test_dir}") do |lang|
  next if not languages.include? lang
  Dir.foreach("#{test_dir}/#{lang}") do |filename|
    next if filename == "." or filename == ".." or filename == 'filenames'
    begin
      pred = s.identify(File.read("#{test_dir}/#{lang}/#{filename}")).downcase
      if pred == 'gcc' then pred = 'c' end
      if lang == pred then
        correct += 1.0
      end
      total += 1
      score = (100 * correct / total).round
      puts "#{lang} #{pred} #{score}"
    rescue
    end
  end
end